packages¶

  • Author: Zhixin Li
In [2]:
options(stringsAsFactors = F)
library(Seurat)
library(RColorBrewer)
library(ggplot2)
library(cowplot)
library(dplyr)
library(patchwork)
Attaching SeuratObject


Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union



Attaching package: ‘patchwork’


The following object is masked from ‘package:cowplot’:

    align_plots


In [3]:
RColorBrewer::brewer.pal(9, "Set1")
RColorBrewer::brewer.pal(8, "Set2")
  1. '#E41A1C'
  2. '#377EB8'
  3. '#4DAF4A'
  4. '#984EA3'
  5. '#FF7F00'
  6. '#FFFF33'
  7. '#A65628'
  8. '#F781BF'
  9. '#999999'
  1. '#66C2A5'
  2. '#FC8D62'
  3. '#8DA0CB'
  4. '#E78AC3'
  5. '#A6D854'
  6. '#FFD92F'
  7. '#E5C494'
  8. '#B3B3B3'
In [4]:
# final 16 clusters of CNCC
library(RColorBrewer)
myColors <- c(brewer.pal(9,"Set1")[c(1:5,7:9)], brewer.pal(8,"Set2")[1:3], brewer.pal(12,"Set3")[c(3:10)])[1:16]
In [5]:
getwd()
'/Volumes/HOME/EllyLab/MacMini/ellyLab-CPOS/2021_Wen_Chen_heart_CNCC'

read 10x¶

In [8]:
SRA.meta <- read.csv("../public/2021_Chen/SraRunTable.csv")
In [6]:
colnames(SRA.meta)
  1. 'Run'
  2. 'Assay.Type'
  3. 'AvgSpotLen'
  4. 'Bases'
  5. 'BioProject'
  6. 'BioSample'
  7. 'BioSampleModel'
  8. 'Bytes'
  9. 'Center.Name'
  10. 'Consent'
  11. 'DATASTORE.filetype'
  12. 'DATASTORE.provider'
  13. 'DATASTORE.region'
  14. 'dev_stage'
  15. 'Experiment'
  16. 'Instrument'
  17. 'Library.Name'
  18. 'LibraryLayout'
  19. 'LibrarySelection'
  20. 'LibrarySource'
  21. 'Organism'
  22. 'Platform'
  23. 'ReleaseDate'
  24. 'Sample.Name'
  25. 'sex'
  26. 'SRA.Study'
  27. 'strain'
  28. 'tissue'
In [9]:
SRA.meta <- SRA.meta[,c("Run","dev_stage")]
In [10]:
SRA.meta
A data.frame: 8 × 2
Rundev_stage
<chr><chr>
SRR10065158E10.5
SRR10065151P7
SRR10065152P1
SRR10065153E17.5
SRR10065154E14.5
SRR10065155E13.5
SRR10065156E12.5
SRR10065157E11.5
In [17]:
raw.count.list <- list()
for (i in 1:nrow(SRA.meta)) {
    print(i)
    tmp.sra <- SRA.meta[i,"Run"]
    tmp.stage <- SRA.meta[i,"dev_stage"]
    tmp.report.path <- paste("../public/2021_Chen/done/", tmp.sra, "_report/outs/filtered_feature_bc_matrix", sep = "")
    tmp.rawdata <- Read10X(tmp.report.path)
    colnames(tmp.rawdata) <- paste(tmp.stage, colnames(tmp.rawdata), sep="_")
    raw.count.list[[tmp.stage]] <- tmp.rawdata
}
[1] 1
[1] 2
[1] 3
[1] 4
[1] 5
[1] 6
[1] 7
[1] 8
In [18]:
# tmp.sra
# tmp.stage
In [21]:
dim(raw.count.list[[1]])
  1. 31053
  2. 2955
In [22]:
save(raw.count.list, file = "2021_Chen.raw.count.list.Rdata")

merge¶

In [24]:
cellMeta <- data.frame()
In [28]:
for (i in names(raw.count.list)) {
    print(i)
    tmp.meta <- data.frame(stage=i, cellName=colnames(raw.count.list[[i]]))
    cellMeta <- rbind(cellMeta, tmp.meta)
}
[1] "E10.5"
[1] "P7"
[1] "P1"
[1] "E17.5"
[1] "E14.5"
[1] "E13.5"
[1] "E12.5"
[1] "E11.5"
In [27]:
exprMat <- cbind(raw.count.list[[1]], raw.count.list[[2]], raw.count.list[[3]], raw.count.list[[4]],
                 raw.count.list[[5]], raw.count.list[[6]], raw.count.list[[7]], raw.count.list[[8]])
In [31]:
save(exprMat, cellMeta, file = "dowloaded.exprMatrix.metaInfo.Rdata")
In [49]:
print(load("dowloaded.exprMatrix.metaInfo.Rdata"))
[1] "exprMat"  "cellMeta"
In [50]:
cellMeta$lab <- "zhou"

Vcl¶

In [51]:
# # removed datasets
# print(load("E13.5_CNCC_merged_updated.Rdata"))
In [52]:
print(load("all_CNCC_merged.Rdata"))
[1] "seuset"   "all_tsne" "markers" 
In [54]:
# vcl.raw.count <- seuset@assays$RNA@counts
In [55]:
vcl.raw.count <- seuset@raw.data
In [56]:
dim(vcl.raw.count)
  1. 16342
  2. 8611
In [57]:
vcl.cellMeta <- data.frame(stage="E13.5", cellName=colnames(vcl.raw.count), lab="Elly")
In [60]:
vcl.cellMeta[grep("ct1", vcl.cellMeta$cellName),]$stage <- "E14.0"
In [61]:
dim(exprMat)
  1. 31053
  2. 44364
In [62]:
# gene.anno.mm10.v3 <- read.csv("https://github.com/leezx/Toolsets/raw/master/data/gene.anno.mm10.3.0.0.csv", sep = ";", header = F)
# gene.anno.mm10.v3 <- gene.anno.mm10.v3[,c("V1","V3","V4","V7","V9","V11")]
# colnames(gene.anno.mm10.v3) <- c("chr", "start", "end", "gene_id", "gene_name", "gene_biotype")
# save(gene.anno.mm10.v3, file = "gene.anno.mm10.v3.rda")
# gene.anno.GRCh38.v3 <- read.csv("https://github.com/leezx/Toolsets/raw/master/data/gene.anno.GRCh38.3.0.0.csv", sep = ";", header = F)
# gene.anno.GRCh38.v3 <- gene.anno.GRCh38.v3[,c("V1","V3","V4","V7","V9","V11")]
# colnames(gene.anno.GRCh38.v3) <- c("chr", "start", "end", "gene_id", "gene_name", "gene_biotype")
# save(gene.anno.GRCh38.v3, file = "gene.anno.GRCh38.v3.rda")
In [63]:
print(load("gene.anno.mm10.v3.rda"))
print(load("gene.anno.GRCh38.v3.rda"))
[1] "gene.anno.mm10.v3"
[1] "gene.anno.GRCh38.v3"
In [66]:
# go to iterbi package
vcl.raw.count.full <- add.missing.genes(vcl.raw.count, all.genes = unique(gene.anno.mm10.v3$gene_name))
In [67]:
exprMat.full <- add.missing.genes(exprMat, all.genes = unique(gene.anno.mm10.v3$gene_name))
In [68]:
dim(vcl.raw.count.full)
  1. 31017
  2. 8611
In [69]:
dim(exprMat.full)
  1. 31017
  2. 44364

Seurat¶

In [70]:
raw.count.full <- cbind(vcl.raw.count.full, exprMat.full)
In [71]:
dim(raw.count.full)
  1. 31017
  2. 52975
In [72]:
cellMeta <- rbind(vcl.cellMeta, cellMeta)
In [73]:
cellMeta$split <- paste(cellMeta$lab, cellMeta$stage, sep = "_")
In [74]:
rownames(cellMeta) <- cellMeta$cellName
In [75]:
head(cellMeta)
A data.frame: 6 × 4
stagecellNamelabsplit
<chr><chr><chr><chr>
vcl_CCTTCGACACCAACCGE13.5vcl_CCTTCGACACCAACCGEllyElly_E13.5
vcl_CTCTAATTCATAGCACE13.5vcl_CTCTAATTCATAGCACEllyElly_E13.5
vcl_CAGAGAGCAGCGAACAE13.5vcl_CAGAGAGCAGCGAACAEllyElly_E13.5
vcl_GCATGCGTCTGCCCTAE13.5vcl_GCATGCGTCTGCCCTAEllyElly_E13.5
vcl_AATCCAGGTCGAACAGE13.5vcl_AATCCAGGTCGAACAGEllyElly_E13.5
vcl_CATCAAGTCTGTCAAGE13.5vcl_CATCAAGTCTGTCAAGEllyElly_E13.5
In [89]:
integrated.cncc <- CreateSeuratObject(counts = raw.count.full, meta.data = cellMeta, project = "integrated.cncc", 
                              min.cells = 3, min.features = 200)
integrated.cncc
An object of class Seurat 
21064 features across 52784 samples within 1 assay 
Active assay: RNA (21064 features, 0 variable features)

QC¶

In [99]:
library('Matrix')
integrated.cncc[["percent.mt"]] <- PercentageFeatureSet(integrated.cncc, pattern = "^mt-")
VlnPlot(integrated.cncc, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
In [102]:
integrated.cncc <- subset(integrated.cncc, subset = nFeature_RNA > 200 & nFeature_RNA < 7500 & percent.mt < 20 & nCount_RNA < 75000)
In [103]:
# split the dataset into a list of two seurat objects (stim and CTRL)
cncc.list <- SplitObject(integrated.cncc, split.by = "split")
In [104]:
# normalize and identify variable features for each dataset independently
cncc.list <- lapply(X = cncc.list, FUN = function(x) {
    x <- NormalizeData(x)
    x <- FindVariableFeatures(x, selection.method = "vst", nfeatures = 2000)
})

add features¶

In [105]:
# marker gene provided by EMBO paper
known.markers.df <- read.table("marker.genes.txt", header = T)
In [106]:
known.markers <- unique(unlist(known.markers.df))
length(known.markers)
1242
In [107]:
# select features that are repeatedly variable across datasets for integration
features <- SelectIntegrationFeatures(object.list = cncc.list, nfeatures = 1000)
In [108]:
length(features)
1000
In [109]:
features <- unique(c(features, known.markers))
length(features)
1736
In [110]:
cncc.anchors <- FindIntegrationAnchors(object.list = cncc.list, anchor.features = features)
Scaling features for provided objects

Finding all pairwise anchors

Running CCA

Merging objects

Finding neighborhoods

Finding anchors

	Found 7124 anchors

Filtering anchors

	Retained 6643 anchors

Extracting within-dataset neighbors

Running CCA

Merging objects

Finding neighborhoods

Finding anchors

	Found 10503 anchors

Filtering anchors

	Retained 4066 anchors

Extracting within-dataset neighbors

Running CCA

Merging objects

Finding neighborhoods

Finding anchors

	Found 6376 anchors

Filtering anchors

	Retained 3093 anchors

Extracting within-dataset neighbors

Running CCA

Merging objects

Finding neighborhoods

Finding anchors

	Found 12548 anchors

Filtering anchors

	Retained 2554 anchors

Extracting within-dataset neighbors

Running CCA

Merging objects

Finding neighborhoods

Finding anchors

	Found 6619 anchors

Filtering anchors

	Retained 2363 anchors

Extracting within-dataset neighbors

Running CCA

Merging objects

Finding neighborhoods

Finding anchors

	Found 9239 anchors

Filtering anchors

	Retained 1479 anchors

Extracting within-dataset neighbors

Running CCA

Merging objects

Finding neighborhoods

Finding anchors

	Found 12881 anchors

Filtering anchors

	Retained 2299 anchors

Extracting within-dataset neighbors

Running CCA

Merging objects

Finding neighborhoods

Finding anchors

	Found 6507 anchors

Filtering anchors

	Retained 2163 anchors

Extracting within-dataset neighbors

Running CCA

Merging objects

Finding neighborhoods

Finding anchors

	Found 9334 anchors

Filtering anchors

	Retained 1865 anchors

Extracting within-dataset neighbors

Running CCA

Merging objects

Finding neighborhoods

Finding anchors

	Found 10139 anchors

Filtering anchors

	Retained 5831 anchors

Extracting within-dataset neighbors

Running CCA

Merging objects

Finding neighborhoods

Finding anchors

	Found 11865 anchors

Filtering anchors

	Retained 2821 anchors

Extracting within-dataset neighbors

Running CCA

Merging objects

Finding neighborhoods

Finding anchors

	Found 6197 anchors

Filtering anchors

	Retained 2469 anchors

Extracting within-dataset neighbors

Running CCA

Merging objects

Finding neighborhoods

Finding anchors

	Found 8896 anchors

Filtering anchors

	Retained 1476 anchors

Extracting within-dataset neighbors

Running CCA

Merging objects

Finding neighborhoods

Finding anchors

	Found 9138 anchors

Filtering anchors

	Retained 4967 anchors

Extracting within-dataset neighbors

Running CCA

Merging objects

Finding neighborhoods

Finding anchors

	Found 10269 anchors

Filtering anchors

	Retained 6821 anchors

Extracting within-dataset neighbors

Running CCA

Merging objects

Finding neighborhoods

Finding anchors

	Found 14756 anchors

Filtering anchors

	Retained 4774 anchors

Extracting within-dataset neighbors

Running CCA

Merging objects

Finding neighborhoods

Finding anchors

	Found 6872 anchors

Filtering anchors

	Retained 3402 anchors

Extracting within-dataset neighbors

Running CCA

Merging objects

Finding neighborhoods

Finding anchors

	Found 9926 anchors

Filtering anchors

	Retained 2413 anchors

Extracting within-dataset neighbors

Running CCA

Merging objects

Finding neighborhoods

Finding anchors

	Found 11328 anchors

Filtering anchors

	Retained 4986 anchors

Extracting within-dataset neighbors

Running CCA

Merging objects

Finding neighborhoods

Finding anchors

	Found 11400 anchors

Filtering anchors

	Retained 6031 anchors

Extracting within-dataset neighbors

Running CCA

Merging objects

Finding neighborhoods

Finding anchors

	Found 10121 anchors

Filtering anchors

	Retained 6220 anchors

Extracting within-dataset neighbors

Running CCA

Merging objects

Finding neighborhoods

Finding anchors

	Found 15183 anchors

Filtering anchors

	Retained 3572 anchors

Extracting within-dataset neighbors

Running CCA

Merging objects

Finding neighborhoods

Finding anchors

	Found 6833 anchors

Filtering anchors

	Retained 2695 anchors

Extracting within-dataset neighbors

Running CCA

Merging objects

Finding neighborhoods

Finding anchors

	Found 10092 anchors

Filtering anchors

	Retained 2516 anchors

Extracting within-dataset neighbors

Running CCA

Merging objects

Finding neighborhoods

Finding anchors

	Found 11885 anchors

Filtering anchors

	Retained 3176 anchors

Extracting within-dataset neighbors

Running CCA

Merging objects

Finding neighborhoods

Finding anchors

	Found 11802 anchors

Filtering anchors

	Retained 3638 anchors

Extracting within-dataset neighbors

Running CCA

Merging objects

Finding neighborhoods

Finding anchors

	Found 10902 anchors

Filtering anchors

	Retained 3577 anchors

Extracting within-dataset neighbors

Running CCA

Merging objects

Finding neighborhoods

Finding anchors

	Found 13543 anchors

Filtering anchors

	Retained 9261 anchors

Extracting within-dataset neighbors

Running CCA

Merging objects

Finding neighborhoods

Finding anchors

	Found 14474 anchors

Filtering anchors

	Retained 6455 anchors

Extracting within-dataset neighbors

Running CCA

Merging objects

Finding neighborhoods

Finding anchors

	Found 6708 anchors

Filtering anchors

	Retained 3932 anchors

Extracting within-dataset neighbors

Running CCA

Merging objects

Finding neighborhoods

Finding anchors

	Found 9863 anchors

Filtering anchors

	Retained 4601 anchors

Extracting within-dataset neighbors

Running CCA

Merging objects

Finding neighborhoods

Finding anchors

	Found 12746 anchors

Filtering anchors

	Retained 2319 anchors

Extracting within-dataset neighbors

Running CCA

Merging objects

Finding neighborhoods

Finding anchors

	Found 12933 anchors

Filtering anchors

	Retained 2361 anchors

Extracting within-dataset neighbors

Running CCA

Merging objects

Finding neighborhoods

Finding anchors

	Found 11880 anchors

Filtering anchors

	Retained 2930 anchors

Extracting within-dataset neighbors

Running CCA

Merging objects

Finding neighborhoods

Finding anchors

	Found 13660 anchors

Filtering anchors

	Retained 6261 anchors

Extracting within-dataset neighbors

Running CCA

Merging objects

Finding neighborhoods

Finding anchors

	Found 14657 anchors

Filtering anchors

	Retained 8394 anchors

Extracting within-dataset neighbors

Running CCA

Merging objects

Finding neighborhoods

Finding anchors

	Found 17545 anchors

Filtering anchors

	Retained 4475 anchors

Extracting within-dataset neighbors

Running CCA

Merging objects

Finding neighborhoods

Finding anchors

	Found 7682 anchors

Filtering anchors

	Retained 2807 anchors

Extracting within-dataset neighbors

Running CCA

Merging objects

Finding neighborhoods

Finding anchors

	Found 10396 anchors

Filtering anchors

	Retained 5400 anchors

Extracting within-dataset neighbors

Running CCA

Merging objects

Finding neighborhoods

Finding anchors

	Found 14121 anchors

Filtering anchors

	Retained 2786 anchors

Extracting within-dataset neighbors

Running CCA

Merging objects

Finding neighborhoods

Finding anchors

	Found 14142 anchors

Filtering anchors

	Retained 2347 anchors

Extracting within-dataset neighbors

Running CCA

Merging objects

Finding neighborhoods

Finding anchors

	Found 12851 anchors

Filtering anchors

	Retained 2751 anchors

Extracting within-dataset neighbors

Running CCA

Merging objects

Finding neighborhoods

Finding anchors

	Found 16132 anchors

Filtering anchors

	Retained 5914 anchors

Extracting within-dataset neighbors

Running CCA

Merging objects

Finding neighborhoods

Finding anchors

	Found 16842 anchors

Filtering anchors

	Retained 8232 anchors

Extracting within-dataset neighbors

Running CCA

Merging objects

Finding neighborhoods

Finding anchors

	Found 16567 anchors

Filtering anchors

	Retained 12166 anchors

Extracting within-dataset neighbors

In [111]:
# this command creates an 'integrated' data assay
cncc.combined <- IntegrateData(anchorset = cncc.anchors, features.to.integrate = features)
Merging dataset 2 into 1

Extracting anchors for merged samples

Finding integration vectors

Finding integration vector weights

Integrating data

Merging dataset 9 into 10

Extracting anchors for merged samples

Finding integration vectors

Finding integration vector weights

Integrating data

Merging dataset 3 into 10 9

Extracting anchors for merged samples

Finding integration vectors

Finding integration vector weights

Integrating data

Merging dataset 6 into 5

Extracting anchors for merged samples

Finding integration vectors

Finding integration vector weights

Integrating data

Merging dataset 8 into 7

Extracting anchors for merged samples

Finding integration vectors

Finding integration vector weights

Integrating data

Merging dataset 4 into 5 6

Extracting anchors for merged samples

Finding integration vectors

Finding integration vector weights

Integrating data

Merging dataset 7 8 into 10 9 3

Extracting anchors for merged samples

Finding integration vectors

Finding integration vector weights

Integrating data

Merging dataset 1 2 into 10 9 3 7 8

Extracting anchors for merged samples

Finding integration vectors

Finding integration vector weights

Integrating data

Merging dataset 5 6 4 into 10 9 3 7 8 1 2

Extracting anchors for merged samples

Finding integration vectors

Finding integration vector weights

Integrating data

Warning message:
“Not all features provided are in this Assay object, removing the following feature(s): 11-Sep, 7-Sep, 4-Sep”
Warning message:
“Adding a command log without an assay associated with it”
In [112]:
# specify that we will perform downstream analysis on the corrected data note that the
# original unmodified data still resides in the 'RNA' assay
DefaultAssay(cncc.combined) <- "integrated"
In [113]:
# Run the standard workflow for visualization and clustering
cncc.combined <- ScaleData(cncc.combined, verbose = FALSE)
In [184]:
# Regress out cell cycle scores during data scaling【not sure】
cncc.combined <- ScaleData(cncc.combined, vars.to.regress = c("S.Score", "G2M.Score"), features = rownames(cncc.combined))
Regressing out S.Score, G2M.Score

Centering and scaling data matrix

In [185]:
# cncc.combined <- RunPCA(cncc.combined, npcs = 30, verbose = FALSE, features = VariableFeatures(seuset))
cncc.combined <- RunPCA(cncc.combined, npcs = 30, verbose = F)
# cncc.combined <- RunTSNE(cncc.combined, reduction = "pca", dims = 1:30)
cncc.combined <- RunUMAP(cncc.combined, reduction = "pca", dims = 1:30)
18:22:20 UMAP embedding parameters a = 0.9922 b = 1.112

18:22:20 Read 51674 rows and found 30 numeric columns

18:22:20 Using Annoy for neighbor search, n_neighbors = 30

18:22:20 Building Annoy index with metric = cosine, n_trees = 50

0%   10   20   30   40   50   60   70   80   90   100%

[----|----|----|----|----|----|----|----|----|----|

*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*

|

18:22:31 Writing NN index file to temp file /tmp/pbs.1145068.omics/RtmpEekIE7/file2e33c7d02cb55

18:22:31 Searching Annoy index using 1 thread, search_k = 3000

18:22:55 Annoy recall = 100%

18:22:56 Commencing smooth kNN distance calibration using 1 thread

18:22:59 Initializing from normalized Laplacian + noise

18:23:04 Commencing optimization for 200 epochs, with 2421742 positive edges

18:24:14 Optimization finished

In [186]:
cncc.combined <- FindNeighbors(cncc.combined, reduction = "pca", dims = 1:30)
cncc.combined <- FindClusters(cncc.combined, resolution = 0.7)
Computing nearest neighbor graph

Computing SNN

Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 51674
Number of edges: 2159422

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.8849
Number of communities: 21
Elapsed time: 18 seconds
In [187]:
cncc.combined <- CellCycleScoring(cncc.combined, s.features = cc.genes$s.genes, g2m.features = cc.genes$g2m.genes, set.ident = F)
Warning message:
“The following features are not present in the object: MCM5, PCNA, TYMS, FEN1, MCM2, MCM4, RRM1, UNG, GINS2, MCM6, CDCA7, DTL, PRIM1, UHRF1, MLF1IP, HELLS, RFC2, RPA2, NASP, RAD51AP1, GMNN, WDR76, SLBP, CCNE2, UBR7, POLD3, MSH2, ATAD2, RAD51, RRM2, CDC45, CDC6, EXO1, TIPIN, DSCC1, BLM, CASP8AP2, USP1, CLSPN, POLA1, CHAF1B, BRIP1, E2F8, not searching for symbol synonyms”
Warning message:
“The following features are not present in the object: HMGB2, CDK1, NUSAP1, UBE2C, BIRC5, TPX2, TOP2A, NDC80, CKS2, NUF2, CKS1B, MKI67, TMPO, CENPF, TACC3, FAM64A, SMC4, CCNB2, CKAP2L, CKAP2, AURKB, BUB1, KIF11, ANP32E, TUBB4B, GTSE1, KIF20B, HJURP, CDCA3, HN1, CDC20, TTK, CDC25C, KIF2C, RANGAP1, NCAPD2, DLGAP5, CDCA2, CDCA8, ECT2, KIF23, HMMR, AURKA, PSRC1, ANLN, LBR, CKAP5, CENPE, CTCF, NEK2, G2E3, GAS2L3, CBX5, CENPA, not searching for symbol synonyms”
Warning message in AddModuleScore(object = object, features = features, name = name, :
“Could not find enough features in the object from the following feature lists: S.Score Attempting to match case...Could not find enough features in the object from the following feature lists: G2M.Score Attempting to match case...”
In [188]:
table(cncc.combined$Phase)
   G1   G2M     S 
25488  9656 16530 
In [189]:
table(cncc.combined@active.ident)
   0    1    2    3    4    5    6    7    8    9   10   11   12   13   14   15 
7789 4958 4423 4080 3861 3811 3797 3565 2694 2359 1974 1753 1600 1419 1157 1152 
  16   17   18   19   20 
 380  375  263  176   88 
In [190]:
# cncc.combined$predict
In [191]:
DefaultAssay(cncc.combined)
'integrated'
In [192]:
markers.to.plot <- c("Tbx20","Thbs1","Meox1",
                     "Spon1","Lmod1","Cav1",
                     "Actg2","Btg2","Sost",
                     "Dlk1","Tshz2","Igfbp5",
                     "Foxf1","Pclaf","Crabp2","Igf1","Ramp2","Lum","Cenpa","Ccnb2","Birc5","Hist1h2ap","Top2a",
                     "Shisa2","Tcf21","Ube2c","Cenpf","Fabp7","Plp1","Ednrb",
                     "Tmem158","Gxylt2","Osr1","Actc1","Phox2a","Gap43","Tubb3","Cdk1",
                     "Ndufa4l2","Sparcl1","Cox4i2","Dct","Pmel","Gpnmb"
                    )
In [193]:
markers.to.plot[duplicated(markers.to.plot)]
In [194]:
cncc.combined$raw_cluster <- cncc.combined@active.ident
In [195]:
options(repr.plot.width=14, repr.plot.height=8)
DotPlot_order(cncc.combined, features = markers.to.plot, dot.scale = 6, col.min=-1, col.max=1, dot.min=0.4) +RotatedAxis()
Warning message:
“Removed 170 rows containing missing values (geom_point).”
In [196]:
markers.to.plot <- c("Myh11","Cxcl12","Pdgfra","Lum","Gfra3","Cnp","Th","Slc18a3","P2ry14","Vtn","Mlana","Dct")
In [197]:
tmp <- c("P2ry14","Vtn")
In [198]:
subset(gene.anno.mm10.v3, gene_name %in% tmp)
A data.frame: 2 × 6
chrstartendgene_idgene_namegene_biotype
<chr><int><int><chr><chr><chr>
55263 5911385559153618ENSMUSG00000036381P2ry14protein_coding
21844117849909178502324ENSMUSG00000017344Vtn protein_coding
In [199]:
options(repr.plot.width=14, repr.plot.height=8)
DotPlot_order(cncc.combined, features = markers.to.plot, assay = "RNA", dot.scale = 6) +RotatedAxis()
In [200]:
# options(repr.plot.width=8, repr.plot.height=4)
# p1 <- DimPlot(cncc.combined, reduction = "umap", group.by = "split")
# p2 <- DimPlot(cncc.combined, reduction = "umap", label = TRUE, repel = TRUE)
# p1 + p2
In [201]:
# options(repr.plot.width=8, repr.plot.height=4)
# p1 <- DimPlot(cncc.combined, reduction = "umap", group.by = "split")
# p2 <- DimPlot(cncc.combined, reduction = "umap", label = TRUE, repel = TRUE)
# p1 + p2
In [202]:
options(repr.plot.width=8, repr.plot.height=8)
DimPlot(cncc.combined, reduction = "umap", label = TRUE, repel = TRUE)
In [203]:
options(repr.plot.width=8, repr.plot.height=8)
DimPlot(cncc.combined, reduction = "umap", label = TRUE, repel = TRUE, group.by = "Phase")
In [338]:
# cncc.combined$unsuper_cluster <- cncc.combined@active.ident
In [339]:
# cncc.combined$manual_anno <- plyr::mapvalues(cncc.combined@active.ident, 
#                                              from = 0:16, to = c("0c6","1c5","2c4","3c1","4c4","5c1","6c1","7c5", #0-7
#                                                                 "8c7","9c1","10c2","11c3","12c8","13c5","14c2","15c9","16c10"))
In [340]:
# cncc.combined$manual_anno <- plyr::mapvalues(cncc.combined@active.ident, 
#                                              from = 0:16, to = c("c6","c5","c4","c1","c4","c1","c1","c5", #0-7
#                                                                 "c7","c1","c2","c3","c8","c5","c2","c9","c10"))
In [341]:
# options(repr.plot.width=8, repr.plot.height=8)
# DimPlot(cncc.combined, reduction = "umap", label = TRUE, repel = TRUE, group.by = "manual_anno")
In [102]:
# options(repr.plot.width=8, repr.plot.height=8)
# DimPlot(cncc.combined, reduction = "tsne", label = TRUE, repel = TRUE, group.by = "manual_anno")
In [342]:
# table(cncc.combined@active.ident, cncc.combined$annotation)
In [204]:
options(repr.plot.width=8, repr.plot.height=8)
DimPlot(cncc.combined, reduction = "umap", group.by = "ident", split.by = "split", ncol = 3)
In [136]:
# tmp <- rep("unknown", length.out = length(cncc.combined@active.ident))
# names(tmp) <- names(cncc.combined@active.ident)
# tmp[names(seuset@active.ident)] <- as.character(seuset@active.ident)
# tmp <- factor(tmp)
# head(tmp)
# cncc.combined$annotation <- tmp
In [137]:
# options(repr.plot.width=8, repr.plot.height=8)
# DimPlot(cncc.combined, reduction = "umap", group.by = "annotation", split.by = "split", ncol = 3)
In [346]:
# using known genes
save(cncc.combined, file = "supervised.cncc.combined.elly.zhou.Rdata")
In [138]:
# using known genes
# add all cells and E14.0
save(cncc.combined, file = "all.supervised.cncc.combined.elly.zhou.Rdata")
In [205]:
# using known genes
# add all cells and E14.0
# regression out cell cycle
save(cncc.combined, file = "all.supervised.cncc.combined.elly.zhou.removeCC.Rdata")

query¶

In [347]:
length(VariableFeatures(seuset))
length(VariableFeatures(integrated.cncc))
length(features)
1773
0
1750
In [348]:
cncc.query.anchors <- FindTransferAnchors(reference = seuset, query = integrated.cncc, dims = 1:30, 
                                          features = unique(c(VariableFeatures(seuset))))
Performing PCA on the provided reference using 1773 features as input.

Projecting PCA

Finding neighborhoods

Finding anchors

	Found 12449 anchors

Filtering anchors

	Retained 7059 anchors

Extracting within-dataset neighbors

In [349]:
seuset$celltype <- seuset@active.ident
In [350]:
predictions <- TransferData(anchorset = cncc.query.anchors, refdata = seuset$celltype,dims = 1:30)
Finding integration vectors

Finding integration vector weights

Predicting cell labels

In [351]:
integrated.cncc <- AddMetaData(integrated.cncc, metadata = predictions)
In [352]:
tmp <- rep("unknown", length.out = length(integrated.cncc@active.ident))
names(tmp) <- names(integrated.cncc@active.ident)
tmp[names(seuset@active.ident)] <- as.character(seuset@active.ident)
tmp <- factor(tmp)
head(tmp)
integrated.cncc$annotation <- tmp
vcl_CCTTCGACACCAACCG
c5
vcl_CTCTAATTCATAGCAC
c4
vcl_CAGAGAGCAGCGAACA
c2
vcl_AATCCAGGTCGAACAG
c5
vcl_CATCAAGTCTGTCAAG
c4
vcl_CAGAGAGCAGCTGGCT
c5
Levels:
  1. 'c1'
  2. 'c2'
  3. 'c3'
  4. 'c4'
  5. 'c5'
  6. 'c6'
  7. 'unknown'
In [353]:
table(integrated.cncc$predicted.id, integrated.cncc$annotation)
    
        c1    c2    c3    c4    c5    c6 unknown
  c1   792     2     0    40    43    28   11977
  c2     2   424     3     8     0     1    2300
  c3     2    16   484    15     1     0     302
  c4    21     8     5  2544    39     2    4649
  c5     9     5     2    88  1549   111    5215
  c6     1     0     0     3     2   143   18814
In [354]:
# save
tmp <- integrated.cncc$predicted.id[colnames(cncc.combined)]
In [355]:
cncc.combined$predict <- integrated.cncc$predicted.id[colnames(cncc.combined)]
In [356]:
options(repr.plot.width=8, repr.plot.height=8)
DimPlot(cncc.combined, reduction = "umap", group.by = "predict", split.by = "split", ncol = 3)
In [357]:
# using known genes
save(cncc.combined, file = "supervised.cncc.combined.elly.zhou.mapped.Rdata")
In [587]:
# add E14.0
# using known genes
save(cncc.combined, file = "all.supervised.cncc.combined.elly.zhou.mapped.Rdata")
In [ ]:

filter main cells¶

In [9]:
options(repr.plot.width=8, repr.plot.height=7)
p <- DimPlot(cncc.combined, reduction = "umap", label = TRUE, repel = TRUE, group.by = "ident", split.by = "predict") +
        geom_vline(xintercept = -7) +
        geom_hline(yintercept = 5.4)
# p
In [388]:
all.umap <- p$data
In [390]:
all.umap <- subset(all.umap, UMAP_1> -7 & UMAP_2 < 5.4)
In [391]:
plot(all.umap$UMAP_1, all.umap$UMAP_2)
In [392]:
all.umap <- subset(all.umap, !predict %in% c("c2","c3"))
In [393]:
head(all.umap)
A data.frame: 6 × 4
UMAP_1UMAP_2identpredict
<dbl><dbl><fct><chr>
vcl_CCTTCGACACCAACCG-2.612578-5.03831820c5
vcl_CTCTAATTCATAGCAC-2.160392-5.51886040c4
vcl_AATCCAGGTCGAACAG 3.421771 0.84632583c5
vcl_CATCAAGTCTGTCAAG-4.815358-2.04712766c4
vcl_CAGAGAGCAGCTGGCT 4.433164 0.19952363c5
vcl_ACGGGTCTCAACACGT-2.521075-4.34851370c4
In [394]:
save(all.umap, file = "all.umap.Rdata")
In [ ]:

In [142]:
ggsave(filename = "integrated.CNCC.8.stages.pdf", width = 8, height = 7)
In [150]:
options(repr.plot.width=8, repr.plot.height=7)
DimPlot(cncc.combined, reduction = "tsne", label = TRUE, repel = TRUE, group.by = "predict")
In [ ]:

In [156]:
options(repr.plot.width=8, repr.plot.height=8)
VlnPlot(cncc.combined, c("Ube2c", "Cdc20", "Sox10", "Fabp7", "Phox2a", "Npy", "Col2a1","Cxcl12","Pdgfra","Lum","Acta2","Tbx20"), pt.size = 0,
        group.by = "predict")
In [157]:
ggsave(filename = "integrated.CNCC.celltype.marker.pdf", width = 8, height = 8)
In [145]:
options(repr.plot.width=9, repr.plot.height=9)
DimPlot(cncc.combined, reduction = "umap", group.by = "predict", split.by = "split", ncol = 3)
In [146]:
ggsave(filename = "integrated.CNCC.split.stages.pdf", width = 9, height = 9)
In [159]:
options(repr.plot.width=7, repr.plot.height=9)
FeaturePlot(cncc.combined, features = c("Lum", "Pdgfra", "Myh11", "Cxcl12", "Des", "Dct"), min.cutoff = "q8", pt.size = 0.1)
In [160]:
ggsave(filename = "integrated.CNCC.key.markers.pdf", width = 7, height = 9)
In [ ]:
# DotPlot(cncc.combined, features = markers.to.plot, cols = c("blue", "red"), dot.scale = 8, split.by = "stim") +
#     RotatedAxis()
In [131]:
save(cncc.combined, file = "cncc.combined.elly.zhou.Rdata")

load¶

In [66]:
# print(load("keyRdata/supervised.cncc.combined.elly.zhou.mapped.Rdata"))
In [68]:
print(load("keyRdata/all.supervised.cncc.combined.elly.zhou.mapped.Rdata"))
[1] "cncc.combined"
In [130]:
dim(cncc.combined)
  1. 1733
  2. 51674
In [131]:
table(cncc.combined$split)
Elly_E13.5 Elly_E14.0 zhou_E10.5 zhou_E11.5 zhou_E12.5 zhou_E13.5 zhou_E14.5 
      6564       1991       2806       7763       6178       6014       6682 
zhou_E17.5    zhou_P1    zhou_P7 
      4341       4560       4775 
In [132]:
6564+1991
8555
In [102]:
print(load("keyRdata/all.cncc.combined.EMBO.mapped.Rdata"))
[1] "cncc.combined.sub" "cncc.markers"     

QC¶

In [72]:
source("https://raw.githubusercontent.com/satijalab/seurat/master/R/utilities.R")
PercentageFeatureSet <- function( 
   object,  
   pattern = NULL,  
   features = NULL,  
   col.name = NULL, 
   assay = NULL 
 ) { 
   library('Matrix')
   assay <- assay %||% DefaultAssay(object = object) 
   if (!is.null(x = features) && !is.null(x = pattern)) { 
     warning("Both pattern and features provided. Pattern is being ignored.") 
   } 
   # features <- features %||% grep(pattern = pattern, x = rownames(x = object[[assay]]), value = TRUE) 
    features <- features %||% grep(pattern = pattern, x = rownames(x = object@assays$RNA@counts), value = TRUE) 
   # print(head(features))
   #percent.featureset <- Matrix::colSums(x = GetAssayData(object = object, slot = "counts"))[features, ]/object[[paste0("nCount_", assay)]] * 100 
   percent.featureset <- Matrix::colSums(x =object@assays$RNA@counts[features, ])/object[[paste0("nCount_", assay)]] * 100 
   if (!is.null(x = col.name)) { 
     object <- AddMetaData(object = object, metadata = percent.featureset, col.name = col.name) 
     return(object) 
   } 
   return(percent.featureset) 
 }
In [73]:
library('Matrix')
cncc.combined[["percent.mt"]] <- PercentageFeatureSet(cncc.combined, pattern = "mt-", assay = 'RNA')
In [124]:
tmp.group <- cncc.combined$split
tmp.group[grep("ct1", names(tmp.group))] <- "E14.0 Control"
tmp.group[grep("ct2", names(tmp.group))] <- "E13.5 Control"
tmp.group[grep("vcl", names(tmp.group))] <- "E13.5 Vcl cKO"
tmp.group <- stringr::str_replace(tmp.group, pattern = "zhou_", replacement = "Public ")
cncc.combined$split2 <- tmp.group
In [134]:
table(cncc.combined$split2)
1623+4941
E13.5 Control E13.5 Vcl cKO E14.0 Control  Public E10.5  Public E11.5 
         1623          4941          1991          2806          7763 
 Public E12.5  Public E13.5  Public E14.5  Public E17.5     Public P1 
         6178          6014          6682          4341          4560 
    Public P7 
         4775 
6564
In [125]:
options(repr.plot.width=12, repr.plot.height=6)
VlnPlot(cncc.combined, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3, pt.size = 0, group.by = "split2") 
# + geom_boxplot(width=0.1, outlier.size = 0)
In [126]:
ggsave(filename = "Figures/QC.pdf", width = 12, height = 6)
In [127]:
tmp.group <- cncc.combined.sub$split
tmp.group[grep("ct1", names(tmp.group))] <- "E14.0 Control"
tmp.group[grep("ct2", names(tmp.group))] <- "E13.5 Control"
tmp.group[grep("vcl", names(tmp.group))] <- "E13.5 Vcl cKO"
tmp.group <- stringr::str_replace(tmp.group, pattern = "zhou", replacement = "Chen_et_al")
cncc.combined.sub$split2 <- tmp.group
In [128]:
options(repr.plot.width=10, repr.plot.height=10)
p1 <- DimPlot(cncc.combined.sub, reduction = "umap", label = F, repel = F, label.size = 5, cols = myColors,
              split.by = "split2", ncol = 3)
p1
In [129]:
ggsave(filename = "Figures/public_integration.pdf", width = 10, height = 10)

order by connectitvy¶

In [141]:
# options(repr.plot.width=8, repr.plot.height=8)
# DimPlot(cncc.combined, reduction = "umap", group.by = "predict", split.by = "split", ncol = 3)
In [539]:
table(cncc.combined@active.ident)
   4    3   13    1    8   10   12    2    5    6    7   14   11   17   19   16 
3861 4080 1419 4958 2694 1974 1600 4423 3811 3797 3565 1157 1753  375  176  380 
  18   20    9   15 
 263   88 2359 1152 
In [541]:
cncc.combined$raw_cluster[is.na(cncc.combined$raw_cluster)] <- 0
In [548]:
cncc.combined$raw_cluster <- factor(cncc.combined$raw_cluster, 
                                     levels = as.character(c(4,3,1,13,
                                                8,6,0,10,12,2,5,7,
                                                14,11,
                                                17,
                                                19,16,18,20,
                                                9,15)))
In [549]:
options(repr.plot.width=8, repr.plot.height=8)
DimPlot(cncc.combined, reduction = "umap", label = TRUE, repel = TRUE, group.by = "raw_cluster")
In [550]:
options(repr.plot.width=14, repr.plot.height=8)
DotPlot_order(cncc.combined, features = markers.to.plot, dot.scale = 6, col.min=-1, col.max=1, dot.min=0.3, 
              group.by = "raw_cluster") + RotatedAxis()
Warning message:
“Removed 81 rows containing missing values (geom_point).”
In [551]:
cncc.combined$ordered_raw_cluster <- plyr::mapvalues(cncc.combined$raw_cluster, 
                       from = c(4,3,1,13,
                                8,6,0,10,12,2,5,7,
                                14,11,
                                17,
                                19,16,18,20,
                                9,15), 
                         to = 1:21)
In [552]:
cncc.combined@active.ident <- cncc.combined$ordered_raw_cluster
In [553]:
cncc.combined@active.ident <- factor(cncc.combined@active.ident, levels = 1:21)

core plots¶

In [554]:
options(repr.plot.width=8, repr.plot.height=8)
DimPlot(cncc.combined, reduction = "umap", label = TRUE, repel = TRUE)
In [509]:
# options(repr.plot.width=8, repr.plot.height=8)
# DimPlot(cncc.combined, reduction = "umap", label = TRUE, repel = TRUE, group.by = "Phase")
In [510]:
markers.to.plot <- c("Tbx20","Thbs1","Meox1",
                     "Spon1","Lmod1","Cav1",
                     "Actg2","Btg2","Sost",
                     "Dlk1","Tshz2","Igfbp5",
                     "Foxf1","Pclaf","Crabp2","Igf1","Ramp2","Lum","Cenpa","Ccnb2","Birc5","Hist1h2ap","Top2a",
                     "Shisa2","Tcf21","Ube2c","Cenpf","Fabp7","Plp1","Ednrb",
                     "Tmem158","Gxylt2","Osr1","Actc1","Phox2a","Gap43","Tubb3","Cdk1",
                     "Ndufa4l2","Sparcl1","Cox4i2","Dct","Pmel","Gpnmb"
                    )
In [511]:
markers.to.plot <- c(# "Top2a","Cdk1","Ube2c","Cdc20","Ccnb1", # CNCC, cell cycle
                     "Tbx20","Thbs1","Meox1","Igf1","Ramp2","Sox9","Lum","Dcn", # MES
                     "Acta2","Cxcl12","Rgs5","Myh11", # VSMC
                     "Phox2a","Gap43","Tubb3","Th", # Neuron
                     "Fabp7","Plp1","Ednrb","Sox10", # Glia
                     "Dct","Pmel","Gpnmb", # MLA
                     "Des","Cnn1"#,
                     #"Col2a1",
                     #"Crabp1","Crabp2"
                    )
In [558]:
options(repr.plot.width=12, repr.plot.height=7)
DotPlot_order(cncc.combined, features = markers.to.plot, dot.scale = 6, col.min=-1, col.max=1, dot.min=0.3) + 
    RotatedAxis()
Warning message:
“Removed 81 rows containing missing values (geom_point).”
In [559]:
table(cncc.combined$stage)
E10.5 E11.5 E12.5 E13.5 E14.0 E14.5 E17.5    P1    P7 
 2806  7763  6178 12578  1991  6682  4341  4560  4775 
In [560]:
cncc.combined.sub <- subset(cncc.combined, subset = ordered_raw_cluster %in% c(1:15,17:18))
In [561]:
cncc.combined.sub$ordered_raw_cluster <- plyr::mapvalues(cncc.combined.sub@active.ident, 
                       from = c(1:15,17:18), 
                         to = 1:17)
In [562]:
cncc.combined.sub@active.ident <- cncc.combined.sub$ordered_raw_cluster
In [563]:
cncc.combined.sub@active.ident <- factor(cncc.combined.sub@active.ident, levels = 1:17)
In [564]:
# options(repr.plot.width=14, repr.plot.height=8)
# DotPlot_order(cncc.combined.sub, features = markers.to.plot, dot.scale = 6, col.min=-1, col.max=1, dot.min=0.3) + RotatedAxis()
In [565]:
options(repr.plot.width=8, repr.plot.height=8)
DimPlot(cncc.combined.sub, reduction = "umap", label = TRUE, repel = TRUE)
In [566]:
options(repr.plot.width=7, repr.plot.height=9)
FeaturePlot(cncc.combined.sub, features = c("Lum", "Pdgfra", "Myh11", "Cxcl12", "Des", "Dct"), min.cutoff = "q8", pt.size = 0.1)
In [567]:
markers.to.plot <- c("Top2a","Cdk1","Ube2c","Cdc20","Ccnb1", # CNCC, cell cycle
                     "Pdgfra","Tbx20","Thbs1","Meox1","Igf1","Ramp2","Sox9","Lum","Dcn","Tcf21","Penk","Sfrp2", # MES
                     "Acta2","Cxcl12","Rgs5","Myh11","Eln","Fbln2", # VSMC
                     "Phox2a","Gap43","Tubb3","Th", # Neuron
                     "Fabp7","Plp1","Ednrb","Sox10", # Glia
                     "Dct","Pmel","Gpnmb","Pax3","Kit", # MLA
                     "Des","Cnn1",
                     #"Col2a1",
                     "Crabp1","Crabp2"
                    )
In [568]:
options(repr.plot.width=12, repr.plot.height=6)
DotPlot_order(cncc.combined.sub, features = markers.to.plot, dot.scale = 6, col.min=-1, col.max=1, dot.min=0.3) + RotatedAxis()
Warning message:
“Removed 54 rows containing missing values (geom_point).”
In [573]:
cncc.combined.sub$ordered_raw_cluster_final <- cncc.combined.sub@active.ident
In [577]:
cncc.combined.sub$EMBO_anno <- plyr::mapvalues(cncc.combined.sub$ordered_raw_cluster_final, 
                       from = 1:17, 
                         to = c("MES-1","MES-2","MES-3","MES-4",
                                "VSMC-1","VSMC-2","VSMC-3","VSMC-4","VSMC-5","VSMC-6","VSMC-7","VSMC-8",
                                "Neuron","SWN","MLA","Mural","Mural"))
In [578]:
cncc.combined.sub@active.ident <- cncc.combined.sub$EMBO_anno
In [579]:
options(repr.plot.width=12, repr.plot.height=6)
DotPlot_order(cncc.combined.sub, features = markers.to.plot, dot.scale = 6, col.min=-1, col.max=1, dot.min=0.3) + RotatedAxis()
Warning message:
“Removed 45 rows containing missing values (geom_point).”

UMAP¶

In [1]:
print(load("keyRdata/all.cncc.combined.EMBO.mapped.Rdata"))
[1] "cncc.combined.sub" "cncc.markers"     
In [8]:
table(cncc.combined.sub@active.ident)
 MES-1  MES-2  MES-3  MES-4 VSMC-1 VSMC-2 VSMC-3 VSMC-4 VSMC-5 VSMC-6 VSMC-7 
  3861   4080   4958   1419   2694   3797   7789   1974   1600   4423   3811 
VSMC-8 Neuron    SWN    MLA  Mural 
  3565   1157   1753    375    643 
In [9]:
table(cncc.combined.sub$stage)
E10.5 E11.5 E12.5 E13.5 E14.0 E14.5 E17.5    P1    P7 
 2268  6971  5922 12341  1957  6092  3777  4204  4367 
In [6]:
options(repr.plot.width=8, repr.plot.height=6)
p1 <- DimPlot(cncc.combined.sub, reduction = "umap", label = TRUE, repel = TRUE, label.size = 5, cols = myColors)
p1
In [457]:
options(repr.plot.width=8, repr.plot.height=6)
p1 <- DimPlot(cncc.combined.sub, reduction = "umap", label = TRUE, repel = TRUE, label.size = 5, cols = myColors)
p1
In [46]:
ggsave(filename = "Figures/integrated.UMAP.pdf", width = 8, height = 6)

Staining¶

In [16]:
options(repr.plot.width=14, repr.plot.height=5)
FeaturePlot(cncc.combined.sub, ncol = 5, min.cutoff = "q8", pt.size = 0.2,
            features = c("Lum","Pdgfra","Cxcl12","Myh11","Phox2a","Th","Sox10","Fabp7","Des","Dct"))
In [17]:
ggsave(filename = "Figures/integrated.marker.staining.pdf", width = 14, height = 5)
In [474]:
options(repr.plot.width=9, repr.plot.height=5)
FeaturePlot(cncc.combined.sub, ncol = 3, min.cutoff = "q8", pt.size = 0.2, slot = "data",
            features = c('Acta2','Tagln','Eln','Fbln5','Mgp','Mylpf'))
Warning message:
“Could not find Mylpf in the default search locations, found in RNA assay instead”
In [475]:
ggsave(filename = "Figures/integrated.marker.staining2.pdf", width = 9, height = 5)
In [ ]:

In [9]:
getwd()
'/home/lizhixin/project/Data_center/analysis'
In [723]:
cncc.markers <- FindAllMarkers(cncc.combined.sub, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
Calculating cluster MES-1

Calculating cluster MES-2

Calculating cluster MES-3

Calculating cluster MES-4

Calculating cluster VSMC-1

Calculating cluster VSMC-2

Calculating cluster VSMC-3

Calculating cluster VSMC-4

Calculating cluster VSMC-5

Calculating cluster VSMC-6

Calculating cluster VSMC-7

Calculating cluster VSMC-8

Calculating cluster Neuron

Calculating cluster SWN

Calculating cluster MLA

Calculating cluster Mural

In [13]:
print(load("keyRdata/all.cncc.combined.EMBO.mapped.Rdata"))
[1] "cncc.combined.sub" "cncc.markers"     

novel markers¶

In [24]:
length(unique(cncc.markers$cluster))
16
In [4]:
table(cncc.markers$cluster)
 MES-1  MES-2  MES-3  MES-4 VSMC-1 VSMC-2 VSMC-3 VSMC-4 VSMC-5 VSMC-6 VSMC-7 
   168     74    186    123     80    160     61     65     35     46     64 
VSMC-8 Neuron    SWN    MLA  Mural 
    50    189    181    244    196 
In [6]:
cncc.markers$`pct.1` <- NULL
cncc.markers$`pct.2` <- NULL
cncc.markers$gene <- NULL
In [7]:
write.csv(cncc.markers, file = "CNCC.16.cluster.csv")
In [724]:
save(cncc.combined.sub, cncc.markers, file = "keyRdata/all.cncc.combined.EMBO.mapped.Rdata")

heatmap¶

In [8]:
head(cncc.markers)
A data.frame: 6 × 4
p_valavg_logFCp_val_adjcluster
<dbl><dbl><dbl><fct>
Tagln201.03126720MES-1
Pdlim300.99658140MES-1
Cdk100.98079710MES-1
Ube2c00.96071070MES-1
Hapln100.92166730MES-1
Hist1h2ap00.89168960MES-1
In [15]:
cncc.markers %>%
    group_by(cluster) %>%
    top_n(n = 10, wt = avg_logFC) -> top10
In [17]:
head(top10)
A grouped_df: 6 × 7
p_valavg_logFCpct.1pct.2p_val_adjclustergene
<dbl><dbl><dbl><dbl><dbl><fct><chr>
01.03126721.0000.9670MES-1Tagln2
00.99658141.0000.9670MES-1Pdlim3
00.98079710.9960.9130MES-1Cdk1
00.96071070.9740.9420MES-1Ube2c
00.92166730.9410.8230MES-1Hapln1
00.89168960.9790.9140MES-1Hist1h2ap
In [20]:
table(cncc.combined.sub@active.ident)
 MES-1  MES-2  MES-3  MES-4 VSMC-1 VSMC-2 VSMC-3 VSMC-4 VSMC-5 VSMC-6 VSMC-7 
  3861   4080   4958   1419   2694   3797   7789   1974   1600   4423   3811 
VSMC-8 Neuron    SWN    MLA  Mural 
  3565   1157   1753    375    643 
In [28]:
options(repr.plot.width=20, repr.plot.height=20)
p <- DoHeatmap(cncc.combined.sub, features = top10$gene, combine = T, angle = 90) + NoLegend()
In [29]:
pdf("Figures/16.cluster.heatmap.pdf", width=15, height=15)
p
dev.off()
png: 2

violin¶

In [18]:

In [19]:
markers.to.plot <- c("Top2a","Cdk1","Ube2c","Cdc20","Ccnb1", # CNCC, cell cycle
                     "Pdgfra","Tbx20","Thbs1","Meox1","Igf1","Ramp2","Sox9","Lum","Dcn","Tcf21","Penk","Sfrp2", # MES
                     "Acta2","Cxcl12","Rgs5","Myh11","Eln","Fbln2", # VSMC
                     "Phox2a","Gap43","Tubb3","Th", # Neuron
                     "Fabp7","Plp1","Ednrb","Sox10", # Glia
                     "Dct","Pmel","Gpnmb","Pax3","Kit", # MLA
                     "Des","Cnn1", # Mural
                     #"Col2a1",
                     "Crabp1","Crabp2"
                    )
In [23]:
features <- c("Myh11","Cxcl12","Pdgfra","Lum","Phox2a","Th","Slc18a3","Sox10","Fabp7","Mlana","Dct","Des","Cnn1")
In [47]:
options(repr.plot.width=8, repr.plot.height=10)
StackedVlnPlot(obj = cncc.combined.sub, features = features, cols = myColors)
Scale for 'y' is already present. Adding another scale for 'y', which will
replace the existing scale.

Scale for 'y' is already present. Adding another scale for 'y', which will
replace the existing scale.

Scale for 'y' is already present. Adding another scale for 'y', which will
replace the existing scale.

Scale for 'y' is already present. Adding another scale for 'y', which will
replace the existing scale.

Scale for 'y' is already present. Adding another scale for 'y', which will
replace the existing scale.

Scale for 'y' is already present. Adding another scale for 'y', which will
replace the existing scale.

Scale for 'y' is already present. Adding another scale for 'y', which will
replace the existing scale.

Scale for 'y' is already present. Adding another scale for 'y', which will
replace the existing scale.

Scale for 'y' is already present. Adding another scale for 'y', which will
replace the existing scale.

Scale for 'y' is already present. Adding another scale for 'y', which will
replace the existing scale.

Scale for 'y' is already present. Adding another scale for 'y', which will
replace the existing scale.

Scale for 'y' is already present. Adding another scale for 'y', which will
replace the existing scale.

Scale for 'y' is already present. Adding another scale for 'y', which will
replace the existing scale.

In [48]:
ggsave(filename = "Figures/integrated.marker.violin.pdf", width = 8, height = 10)
In [ ]:

known gene module score¶

In [569]:
# marker gene provided by EMBO paper
known.markers.df <- read.table("marker.genes.txt", header = T)
In [570]:
for (i in colnames(known.markers.df)) {
    tmp.module <- known.markers.df[,i]
    tmp.module <- tmp.module[tmp.module %in% rownames(cncc.combined.sub@assays$integrated@scale.data)]
    cncc.combined[[i]] <- colMeans(cncc.combined.sub@assays$integrated@scale.data[tmp.module,])
}
In [571]:
options(repr.plot.width=12, repr.plot.height=20)
VlnPlot(cncc.combined.sub, features = colnames(known.markers.df), ncol = 3, pt.size = 0)

annotate to EMBO¶

In [23]:
table(cncc.combined$raw_cluster, cncc.combined$Phase)
    
       G1  G2M    S
  0  3363   25 2230
  1  4509    0  156
  2  4325    2  269
  3  3615   11  740
  4    27 2045 2073
  5     1 1651 2354
  6   673  447 2049
  7     8 1729 1430
  8  2453    1  321
  9  1529  477  450
  10  728  987  114
  11  106  603  954
  12 1385   53  158
  13  644  636   66
  14 1054    0   73
  15  295  360  444
  16  505   77  225
  17   59  276   66
  18   11  110  237
  19    5  144   60
  20   43   22  103
  21   22   16   46
In [36]:
cncc.combined$EMBO <- plyr::mapvalues(cncc.combined$raw_cluster, 
                       from = c(4,5,7,
                                13,3,1,
                                6,10,0, 2,8,14,12,
                                11,16,18,
                                17,19,
                                9,15,20,21), 
                         to = c("CNCC-1","CNCC-2","CNCC-3",
                                "MES-1","MES-2","MES-3",
                                "VSMC-1","VSMC-2","VSMC-3", "VSMC-4","VSMC-5","VSMC-6","VSMC-7",
                                "SWN","Neuron","MLA",
                                "Mural","Mural",
                                "Unknown","Unknown","Unknown","Unknown"))
In [11]:
cncc.combined$EMBO <- factor(cncc.combined$EMBO, levels = c("CNCC-1","CNCC-2","CNCC-3",
                                "MES-1","MES-2","MES-3",
                                "VSMC-1","VSMC-2","VSMC-3", "VSMC-4","VSMC-5","VSMC-6","VSMC-7",
                                "SWN","Neuron","MLA",
                                "Mural","Unknown"))
In [37]:
cncc.combined$EMBO2 <- plyr::mapvalues(cncc.combined$raw_cluster, 
                       from = c(4,5,7,
                                13,3,1,
                                6,10,0, 2,8,14,12,
                                11,16,18,
                                17,19,9,15,20,21), 
                         to = c("CNCC","CNCC","CNCC",
                                "MES","MES","MES",
                                "VSMC","VSMC","VSMC", "VSMC","VSMC","VSMC","VSMC",
                                "SWN","Neuron","MLA",
                                "Mural","Mural",
                                "Unknown","Unknown","Unknown","Unknown"))
In [227]:
cncc.combined$EMBO2 <- factor(cncc.combined$EMBO2, levels = c("CNCC","MES","VSMC","Neuron","SWN","MLA","Mural","Unknown"))
In [38]:
# cncc.combined@active.ident <- cncc.combined$EMBO
In [228]:
options(repr.plot.width=8, repr.plot.height=8)
DimPlot(cncc.combined, reduction = "umap", label = TRUE, repel = TRUE, group.by = "EMBO2")
In [238]:
cncc.combined$split <- factor(cncc.combined$split, levels = c("zhou_E10.5","zhou_E11.5","zhou_E12.5","zhou_E13.5","Elly_E13.5",
                                                              "zhou_E14.5","zhou_E17.5","zhou_P1","zhou_P7"))
In [239]:
options(repr.plot.width=9, repr.plot.height=9)
DimPlot(cncc.combined, reduction = "umap", group.by = "EMBO2", split.by = "split", ncol = 3)
In [230]:
options(repr.plot.width=8, repr.plot.height=8)
DimPlot(cncc.combined, reduction = "umap", label = TRUE, repel = TRUE, group.by = "EMBO")
In [52]:
cncc.combined@active.ident <- cncc.combined$EMBO2
In [53]:
markers.to.plot <- c("Tbx20","Thbs1","Meox1",
                     "Spon1","Lmod1","Cav1",
                     "Actg2","Btg2","Sost",
                     "Dlk1","Tshz2","Igfbp5",
                     "Foxf1","Pclaf","Crabp2","Igf1","Ramp2","Lum","Cenpa","Ccnb2","Birc5","Hist1h2ap","Top2a",
                     "Shisa2","Tcf21","Ube2c","Cenpf","Fabp7","Plp1","Ednrb",
                     "Tmem158","Gxylt2","Osr1","Actc1","Phox2a","Gap43","Tubb3","Cdk1",
                     "Ndufa4l2","Sparcl1","Cox4i2","Dct","Pmel","Gpnmb"
                    )
In [54]:
table(cncc.combined@active.ident)
   VSMC     MES    CNCC Unknown     SWN  Neuron   Mural     MLA 
  20710   10377   11318    3807    1663     807     610     358 
In [223]:
cncc.combined@active.ident <- factor(cncc.combined@active.ident, 
                                     levels = rev(c("CNCC","MES","VSMC","Neuron","SWN","MLA","Mural","Unknown")))
In [224]:
markers.to.plot <- c("Top2a","Cdk1","Ube2c","Cdc20","Ccnb1", # CNCC
                     "Tbx20","Thbs1","Meox1","Igf1","Ramp2","Sox9","Lum","Dcn", # MES
                     "Acta2","Cxcl12","Rgs5","Myh11", # VSMC
                     "Phox2a","Gap43","Tubb3","Th", # Neuron
                     "Fabp7","Plp1","Ednrb","Sox10", # Glia
                     "Dct","Pmel","Gpnmb", # MLA
                     "Des","Cnn1",
                     "Col2a1"
                    )
In [225]:
options(repr.plot.width=11, repr.plot.height=5)
DotPlot_order(cncc.combined, features = markers.to.plot, dot.scale = 8, col.min=-1, col.max=2, dot.min=0.4) + RotatedAxis()
Warning message:
“Removed 39 rows containing missing values (geom_point).”
In [73]:
cncc.combined@active.ident <- factor(cncc.combined@active.ident, 
                                     levels = c("CNCC","MES","VSMC","Neuron","SWN","MLA","Mural","Unknown"))
In [74]:
options(repr.plot.width=8, repr.plot.height=8)
DimPlot(cncc.combined, reduction = "umap", label = TRUE, repel = TRUE)
In [232]:
dim(cncc.combined@assays$RNA@counts)
  1. 16692
  2. 49650

novel markers¶

In [75]:
cncc.markers <- FindAllMarkers(cncc.combined, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
Calculating cluster CNCC

Calculating cluster MES

Calculating cluster VSMC

Calculating cluster Neuron

Calculating cluster SWN

Calculating cluster MLA

Calculating cluster Mural

Calculating cluster Unknown

In [84]:
library(dplyr)
In [91]:
# install.packages("dplyr")
In [89]:
packageVersion("dplyr")
[1] ‘0.8.5’
In [97]:
cncc.markers %>%
    group_by(cluster) %>%
    top_n(n = 10, wt = avg_logFC) -> top10
In [98]:
pdf("Figures/lineage.markers.heatmap.new.clusters.pdf", width=12, height=10)
DoHeatmap(cncc.combined, features = top10$gene) + NoLegend()
dev.off()
png: 2
In [99]:
# using known genes
save(cncc.combined, cncc.markers, file = "supervised.cncc.combined.elly.zhou.mapped.Rdata")
In [44]:
getwd()
'/home/lizhixin/project/Data_center/analysis'

load¶

In [4]:
print(load("supervised.cncc.combined.elly.zhou.mapped.Rdata"))
[1] "cncc.combined" "cncc.markers" 
In [5]:
markers.to.plot <- c("Top2a","Cdk1","Ube2c","Cdc20","Ccnb1", # CNCC
                     "Tbx20","Thbs1","Meox1","Igf1","Ramp2","Sox9","Lum","Dcn", # MES
                     "Acta2","Cxcl12","Rgs5","Myh11", # VSMC
                     "Phox2a","Gap43","Tubb3","Th", # Neuron
                     "Fabp7","Plp1","Ednrb","Sox10", # Glia
                     "Dct","Pmel","Gpnmb", # MLA
                     "Des","Cnn1",
                     "Col2a1"
                    )
In [8]:
options(repr.plot.width=11, repr.plot.height=5)
DotPlot_order(cncc.combined, features = markers.to.plot, dot.scale = 8, col.min=-1, col.max=2, dot.min=0.4) + RotatedAxis()
Warning message:
“Removed 39 rows containing missing values (geom_point).”
In [12]:
cncc.combined@active.ident <- cncc.combined$EMBO
In [14]:
options(repr.plot.width=11, repr.plot.height=6)
DotPlot_order(cncc.combined, features = markers.to.plot, dot.scale = 8, col.min=-1, col.max=2, dot.min=0.4) + RotatedAxis()
Warning message:
“Removed 100 rows containing missing values (geom_point).”
In [33]:
cncc.combined$EMBO3 <- plyr::mapvalues(cncc.combined$raw_cluster, 
                       from = c(4,5,7,
                                13,3,1,
                                6,10,0, 2,8,14,12,
                                11,16,18,
                                17,19,
                                9,15,20,21), 
                         to = c("MES-1","VSMC-2","VSMC-1",
                                "MES-2","MES-3","MES-4",
                                "VSMC-3","VSMC-4","VSMC-5", "VSMC-6","VSMC-7","VSMC-8","VSMC-9",
                                "SWN","Neuron","MLA",
                                "Mural","Mural",
                                "Unknown","Unknown","Unknown","Unknown"))
In [34]:
cncc.combined$EMBO3 <- factor(cncc.combined$EMBO3, levels = c("MES-1","MES-2","MES-3","MES-4",
                                "VSMC-1","VSMC-2","VSMC-3","VSMC-4", "VSMC-5","VSMC-6","VSMC-7","VSMC-8","VSMC-9",
                                "SWN","Neuron","MLA",
                                "Mural","Unknown"))
In [35]:
cncc.combined@active.ident <- cncc.combined$EMBO3
In [36]:
options(repr.plot.width=11, repr.plot.height=6)
DotPlot_order(cncc.combined, features = markers.to.plot, dot.scale = 8, col.min=-1, col.max=2, dot.min=0.4) + RotatedAxis()
Warning message:
“Removed 100 rows containing missing values (geom_point).”
In [37]:
options(repr.plot.width=8, repr.plot.height=8)
DimPlot(cncc.combined, reduction = "umap", label = TRUE, repel = TRUE, group.by = "EMBO3")
In [38]:
cncc.combined$EMBO4 <- plyr::mapvalues(cncc.combined$raw_cluster, 
                       from = c(4,5,7,
                                13,3,1,
                                6,10,0, 2,8,14,12,
                                11,16,18,
                                17,19,
                                9,15,20,21), 
                         to = c("MES","VSMC","VSMC",
                                "MES","MES","MES",
                                "VSMC","VSMC","VSMC", "VSMC","VSMC","VSMC","VSMC",
                                "SWN","Neuron","MLA",
                                "Mural","Mural",
                                "Unknown","Unknown","Unknown","Unknown"))
In [39]:
cncc.combined$EMBO4 <- factor(cncc.combined$EMBO4, levels = c("MES","VSMC","Neuron","SWN","MLA","Mural","Unknown"))
In [40]:
options(repr.plot.width=8, repr.plot.height=8)
DimPlot(cncc.combined, reduction = "umap", label = TRUE, repel = TRUE, group.by = "EMBO4")
In [41]:
cncc.combined@active.ident <- cncc.combined$EMBO4
In [43]:
options(repr.plot.width=11, repr.plot.height=4)
DotPlot_order(cncc.combined, features = markers.to.plot, dot.scale = 8, col.min=-1, col.max=2, dot.min=0.4) + RotatedAxis()
Warning message:
“Removed 34 rows containing missing values (geom_point).”
In [ ]:

In [ ]:

Cladogram¶

In [9]:
print(load("all.cncc.combined.EMBO.mapped.Rdata"))
[1] "cncc.combined.sub" "cncc.markers"     
In [26]:
# API
groups <- cncc.combined.sub@active.ident
exprM <- cncc.combined.sub@assays$integrated@scale.data[,names(groups)]
In [27]:
names(table(groups))
  1. 'MES-1'
  2. 'MES-2'
  3. 'MES-3'
  4. 'MES-4'
  5. 'VSMC-1'
  6. 'VSMC-2'
  7. 'VSMC-3'
  8. 'VSMC-4'
  9. 'VSMC-5'
  10. 'VSMC-6'
  11. 'VSMC-7'
  12. 'VSMC-8'
  13. 'Neuron'
  14. 'SWN'
  15. 'MLA'
  16. 'Mural'
In [28]:
merged.exprM <- data.frame()
# samples <- c("IMR_ENCC", "UE_ENCC", "HSCR_5c3","HSCR_20c7","HSCR_10c2","HSCR_1c11","HSCR_17c8","HSCR_23c9")
samples <- unique(groups)

for (i in samples) {
    print(i)
    tmp.exprM <- exprM[,groups==i]
    tmp.exprM.merge <- rowMeans(tmp.exprM)
    merged.exprM <- rbind(merged.exprM, tmp.exprM.merge)
    # break
}
#
merged.exprM <- as.data.frame(t(merged.exprM))
colnames(merged.exprM) <- samples
rownames(merged.exprM) <- names(tmp.exprM.merge)
[1] "VSMC-7"
[1] "SWN"
[1] "Neuron"
[1] "MES-2"
[1] "VSMC-8"
[1] "VSMC-6"
[1] "MES-3"
[1] "VSMC-3"
[1] "MES-4"
[1] "VSMC-2"
[1] "VSMC-1"
[1] "VSMC-4"
[1] "VSMC-5"
[1] "Mural"
[1] "MES-1"
[1] "MLA"
In [29]:
# devtools::install_version("phangorn", version = "2.5.5", repos = "http://cran.us.r-project.org")

# # install.packages('devtools')
# devtools::install_github("jingwyang/TreeExp", dependencies = F)
In [30]:
library(TreeExp)
disM <- dist.euc(merged.exprM[,])
dim(disM)
Attaching package: ‘TreeExp’


The following object is masked from ‘package:base’:

    print


  1. 16
  2. 16
In [31]:
rownames(disM) <- colnames(merged.exprM)
colnames(disM) <- colnames(merged.exprM)
In [32]:
# # cluster order
# disM <- disM[,as.character(1:17)]
In [33]:
disM[is.na(disM)] <- 0

tr <- NJ(disM)
tr$tip.label

tr <- ape::root(tr, outgroup = "MLA")

library("ggtree")
  1. 'VSMC-7'
  2. 'SWN'
  3. 'Neuron'
  4. 'MES-2'
  5. 'VSMC-8'
  6. 'VSMC-6'
  7. 'MES-3'
  8. 'VSMC-3'
  9. 'MES-4'
  10. 'VSMC-2'
  11. 'VSMC-1'
  12. 'VSMC-4'
  13. 'VSMC-5'
  14. 'Mural'
  15. 'MES-1'
  16. 'MLA'
Registered S3 method overwritten by 'treeio':
  method     from
  root.phylo ape 

ggtree v2.0.4  For help: https://yulab-smu.github.io/treedata-book/

If you use ggtree in published research, please cite the most appropriate paper(s):

- Guangchuang Yu, Tommy Tsan-Yuk Lam, Huachen Zhu, Yi Guan. Two methods for mapping and visualizing associated data on phylogeny using ggtree. Molecular Biology and Evolution 2018, 35(12):3041-3043. doi: 10.1093/molbev/msy194
- Guangchuang Yu, David Smith, Huachen Zhu, Yi Guan, Tommy Tsan-Yuk Lam. ggtree: an R package for visualization and annotation of phylogenetic trees with their covariates and other associated data. Methods in Ecology and Evolution 2017, 8(1):28-36, doi:10.1111/2041-210X.12628



myColors¶

In [35]:
myColors[1:12]
  1. '#E41A1C'
  2. '#377EB8'
  3. '#4DAF4A'
  4. '#984EA3'
  5. '#FF7F00'
  6. '#A65628'
  7. '#F781BF'
  8. '#999999'
  9. '#66C2A5'
  10. '#FC8D62'
  11. '#8DA0CB'
  12. '#BEBADA'
In [36]:
# names(myColors) <- as.character(1:17)
In [37]:
tr$tip.label <- factor(tr$tip.label, levels = c('MES-1','MES-2','MES-3','MES-4','VSMC-1','VSMC-2','VSMC-3',
                       'VSMC-4','VSMC-5','VSMC-6','VSMC-7','VSMC-8','Neuron','SWN','MLA','Mural'))
In [38]:
# tr$tip.label <- factor(1:17, levels = 1:17)
In [39]:
myColors <- myColors[as.numeric(tr$tip.label)]
In [40]:
tr$tip.label <- as.character(tr$tip.label)
In [41]:
options(repr.plot.width=8, repr.plot.height=7)
display.brewer.all()
In [42]:
# all gennes
options(repr.plot.width=12, repr.plot.height=3)
ggtree(tr, branch.length="none", linetype = "solid", size = 1) +
  # theme_tree2() +
  # vjust: + -> down; hjust: + -> left
  geom_tiplab(size=6, color=myColors, align=F, hjust = 0.5, vjust = 2, angle = 30, fontface = "bold") + # normal hjust = 0.5, vjust = 1.5
  #scale_y_continuous(limits = c(0, 5)) +
  coord_flip() +
  scale_x_reverse(expand = c(0,3)) +
  labs(x = "", y = "", title = "Lineage Cladogram") +
  theme(plot.title = element_text(hjust = 0.5, vjust = -7, face = "bold", size=20))
Warning message:
“`data_frame()` is deprecated as of tibble 1.1.0.
Please use `tibble()` instead.
This warning is displayed once every 8 hours.
Call `lifecycle::last_warnings()` to see where this warning was generated.”
In [43]:
ggsave(filename = "Figures/integrated.Lineage.Cladogram.pdf", width = 12, height = 3)
In [692]:
# all gennes
options(repr.plot.width=9, repr.plot.height=9)
ggtree(tr, linetype = "solid", size = 1, layout="daylight", branch.length = 'none') + # 
  # theme_tree2() +
  # vjust: + -> down; hjust: + -> left
  geom_tiplab(size=6, color=myColors, align=F, angle = 0, vjust = 0, fontface = "bold") +
  #scale_y_continuous(limits = c(0, 5)) +
  coord_flip() +
  scale_x_reverse(expand = c(0,3)) +
  labs(x = "", y = "", title = "Lineage Cladogram") +
  theme(plot.title = element_text(hjust = 0.5, vjust = -4, face = "bold", size=20))
Average angle change [1] 0.112445406660657

Average angle change [2] 0.0357877565782561

velocity¶

In [1]:
print(load("all.supervised.cncc.combined.elly.zhou.mapped.Rdata"))
[1] "cncc.combined"
In [7]:
table(cncc.combined$split)
Elly_E13.5 Elly_E14.0 zhou_E10.5 zhou_E11.5 zhou_E12.5 zhou_E13.5 zhou_E14.5 
      6564       1991       2806       7763       6178       6014       6682 
zhou_E17.5    zhou_P1    zhou_P7 
      4341       4560       4775 
In [9]:
print(load("all.cncc.combined.EMBO.mapped.Rdata"))
[1] "cncc.combined.sub" "cncc.markers"     
In [10]:
options(repr.plot.width=8, repr.plot.height=7)
p <- DimPlot(cncc.combined.sub, reduction = "umap", label = TRUE, repel = TRUE, group.by = "ident", split.by = "split") +
        geom_vline(xintercept = -7) +
        geom_hline(yintercept = 5.4)
# p
all.umap <- p$data
Warning message:
“Using `as.character()` on a quosure is deprecated as of rlang 0.3.0.
Please use `as_label()` or `as_name()` instead.
This warning is displayed once per session.”
In [11]:
head(all.umap)
A data.frame: 6 × 4
UMAP_1UMAP_2identsplit
<dbl><dbl><fct><chr>
vcl_CCTTCGACACCAACCG-4.80720965 -1.920804VSMC-7Elly_E13.5
vcl_CTCTAATTCATAGCAC-5.10676066 -2.119910VSMC-7Elly_E13.5
vcl_CAGAGAGCAGCGAACA 4.45797499-11.141129SWN Elly_E13.5
vcl_GCATGCGTCTGCCCTA-0.03043394 -8.702329NeuronElly_E13.5
vcl_AATCCAGGTCGAACAG 0.95436476 5.041812MES-2 Elly_E13.5
vcl_CATCAAGTCTGTCAAG-0.60639481 -3.524776VSMC-8Elly_E13.5
In [12]:
# all.umap <- subset(all.umap, UMAP_1> -7 & UMAP_2 < 5.4)
In [13]:
all.umap <- subset(all.umap, UMAP_2 > -6)
In [14]:
all.umap <- subset(all.umap, !(UMAP_1>2 & UMAP_2<0))
In [15]:
ggplot(all.umap, aes(x=UMAP_1, y=UMAP_2)) +
    geom_point() +
    geom_vline(xintercept = 2) +
    geom_hline(yintercept = 0) +
    geom_hline(yintercept = -6)
# plot(all.umap$UMAP_1, all.umap$UMAP_2)
In [16]:
# all.umap <- subset(all.umap, ident %in% 1:12)
In [17]:
# all.umap <- subset(all.umap, !predict %in% c("c2","c3"))
In [18]:
head(all.umap)
A data.frame: 6 × 4
UMAP_1UMAP_2identsplit
<dbl><dbl><fct><chr>
vcl_CCTTCGACACCAACCG-4.8072097-1.920804VSMC-7Elly_E13.5
vcl_CTCTAATTCATAGCAC-5.1067607-2.119910VSMC-7Elly_E13.5
vcl_AATCCAGGTCGAACAG 0.9543648 5.041812MES-2 Elly_E13.5
vcl_CATCAAGTCTGTCAAG-0.6063948-3.524776VSMC-8Elly_E13.5
vcl_CAGAGAGCAGCTGGCT-2.3134097-1.675376VSMC-6Elly_E13.5
vcl_ACGGGTCTCAACACGT-3.7033431-3.059830VSMC-6Elly_E13.5
In [722]:
save(all.umap, file = "all.umap.rmCC.Rdata")
In [19]:
all.umap$cluster <- all.umap$ident
all.umap <- all.umap[grep("VSMC|MES", all.umap$cluster),]
In [20]:
all.umap.elly <- all.umap[grep("ct2|vcl", rownames(all.umap)),]
all.umap.elly$rawName <- rownames(all.umap.elly)
In [22]:
tmp.count <- as.matrix(cncc.combined.sub@assays$RNA@counts[,all.umap.elly$rawName])
In [28]:
write.table(t(tmp.count), quote = F, file = "/home/lizhixin/project/scPipeline/SPRING/SPRING_dev/data_prep/vcl/E13.5_vcl.tsv", sep="\t")
In [27]:
write.table(rownames(tmp.count), quote = F, row.names = F, col.names = F, file = "/home/lizhixin/project/scPipeline/SPRING/SPRING_dev/data_prep/vcl/genes.txt", sep="\t")
In [29]:
dim(tmp.count)
  1. 21064
  2. 5683
In [42]:
tmp.df <- read.csv("/home/lizhixin/project/scPipeline/SPRING/SPRING_dev/datasets/vcl/Elly/color_data_gene_sets.csv", header = F, row.names = 1)
Warning message in read.table(file = file, header = header, sep = sep, quote = quote, :
“incomplete final line found by readTableHeader on '/home/lizhixin/project/scPipeline/SPRING/SPRING_dev/datasets/vcl/Elly/color_data_gene_sets.csv'”
In [43]:
dim(tmp.df)
  1. 2
  2. 5683
In [121]:
all.umap.elly$group <- "Vcl cKO"
In [122]:
all.umap.elly[grep("ct", all.umap.elly$rawName),]$group <- "Control"
In [53]:
head(all.umap.elly)
A data.frame: 6 × 7
UMAP_1UMAP_2identsplitclusterrawNamegroup
<dbl><dbl><fct><chr><fct><chr><chr>
vcl_CCTTCGACACCAACCG-4.8072097-1.920804VSMC-7Elly_E13.5VSMC-7vcl_CCTTCGACACCAACCGVclcKO
vcl_CTCTAATTCATAGCAC-5.1067607-2.119910VSMC-7Elly_E13.5VSMC-7vcl_CTCTAATTCATAGCACVclcKO
vcl_AATCCAGGTCGAACAG 0.9543648 5.041812MES-2 Elly_E13.5MES-2 vcl_AATCCAGGTCGAACAGVclcKO
vcl_CATCAAGTCTGTCAAG-0.6063948-3.524776VSMC-8Elly_E13.5VSMC-8vcl_CATCAAGTCTGTCAAGVclcKO
vcl_CAGAGAGCAGCTGGCT-2.3134097-1.675376VSMC-6Elly_E13.5VSMC-6vcl_CAGAGAGCAGCTGGCTVclcKO
vcl_ACGGGTCTCAACACGT-3.7033431-3.059830VSMC-6Elly_E13.5VSMC-6vcl_ACGGGTCTCAACACGTVclcKO
In [45]:
# tmp.df <- rbind(tmp.df, as.character(all.umap.elly$cluster), all.umap.elly$split)
In [47]:
# rownames(tmp.df) <- c("Total Counts","Uniform","Cluster","Split")
In [54]:
# tmp.df[1:4,1:5]
In [57]:
write.table(all.umap.elly, quote = F,sep = ",",row.names = T,col.names = T, file = "/home/lizhixin/project/scPipeline/SPRING/SPRING_dev/datasets/vcl/cellanno.csv")

SPRING¶

In [90]:
spring <- read.csv("coordinates.txt", header = F)
In [91]:
all.umap.elly$SPRING1 <- spring$V2
In [92]:
all.umap.elly$SPRING2 <- spring$V3
In [96]:
head(all.umap.elly)
A data.frame: 6 × 9
UMAP_1UMAP_2identsplitclusterrawNamegroupSPRING1SPRING2
<dbl><dbl><fct><chr><fct><chr><chr><dbl><dbl>
vcl_CCTTCGACACCAACCG-4.8072097-1.920804VSMC-7Elly_E13.5VSMC-7vcl_CCTTCGACACCAACCGVclcKO1017.8566 95.23537
vcl_CTCTAATTCATAGCAC-5.1067607-2.119910VSMC-7Elly_E13.5VSMC-7vcl_CTCTAATTCATAGCACVclcKO 754.9424349.01373
vcl_AATCCAGGTCGAACAG 0.9543648 5.041812MES-2 Elly_E13.5MES-2 vcl_AATCCAGGTCGAACAGVclcKO1018.6995 73.98390
vcl_CATCAAGTCTGTCAAG-0.6063948-3.524776VSMC-8Elly_E13.5VSMC-8vcl_CATCAAGTCTGTCAAGVclcKO 494.7210669.97602
vcl_CAGAGAGCAGCTGGCT-2.3134097-1.675376VSMC-6Elly_E13.5VSMC-6vcl_CAGAGAGCAGCTGGCTVclcKO1032.1202172.69868
vcl_ACGGGTCTCAACACGT-3.7033431-3.059830VSMC-6Elly_E13.5VSMC-6vcl_ACGGGTCTCAACACGTVclcKO 798.3132514.97958
In [1]:
print(load("all.umap.elly.SPRING.Rdata"))
[1] "all.umap.elly"   "all.umap.elly.b"
In [6]:
centers <- all.umap.elly %>% dplyr::group_by(cluster) %>% summarize(SPRING1 = median(SPRING1), 
            SPRING2 = median(SPRING2))
In [7]:
library(ggrepel)
In [8]:
table(all.umap.elly$group)
Control Vcl cKO 
   1385    4298 
In [9]:
all.umap.elly.b <- rbind(subset(all.umap.elly, group=="Control"),
                         subset(all.umap.elly, group=="Vcl cKO")[sample(1:4298, 1385),])
In [17]:
options(repr.plot.width=8, repr.plot.height=5)
pcag <- ggplot(all.umap.elly.b, aes(x=SPRING1, y=-SPRING2, color=cluster)) +
    # facet_grid(cols = vars(variable)) +
    facet_wrap( ~ group, ncol=2) + # error in border
    geom_point(size=1, alpha=1) +
    # geom_density_2d(color='black', size=0.05, alpha=0.15) +
    geom_text_repel(data = centers, mapping = aes(label = cluster), size = 4.5, color="black", fontface = "bold") +
    # geom_line(data=pc.line1, color='red', size=0.5) +
    labs(x = "SPRING Dim-1",y = "SPRING Dim-2", title = "") +
    theme_bw() +
    theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
    theme(legend.title=element_blank()) +
    theme(legend.position = "none") +
    theme(strip.background = element_rect(fill = "gray97", color = NA)) + # strip background color
    theme(strip.placement = "outside", strip.text.x = element_text(face="plain", size = 14), #italic
          strip.text.y = element_text(face="plain", size = 11)) +
    theme(panel.spacing=unit(.3, "lines"),panel.border = element_rect(color = "black", fill = NA, size = 0.2,colour = "black")) + #line size
    scale_color_manual(values=myColors)
pcag
In [18]:
ggsave(filename = "SPRING.vcl.control.pdf", width = 8, height = 5)
In [ ]:

In [134]:
save(all.umap.elly, all.umap.elly.b, file = "/home/lizhixin/project/Data_center/analysis/all.umap.elly.SPRING.Rdata")
In [133]:
getwd()
'/home/lizhixin/project/Data_center/analysis'

Vcl lineage disruption¶

In [695]:
table(cncc.combined.sub@active.ident, cncc.combined.sub$stage)
        
         E10.5 E11.5 E12.5 E13.5 E14.0 E14.5 E17.5   P1   P7
  MES-1    193  1335   637  1039    42   399    63   76   77
  MES-2    168   586   401  1219    73   593   211  261  568
  MES-3    159   625   622  1554    67  1284   224  205  218
  MES-4     84   548   215   330    20   135    21   19   47
  VSMC-1   179   399   346   543    75   310   368  294  180
  VSMC-2   609   801   850   809   211   221   112  118   66
  VSMC-3   108   528   347   791   156  1062  1174 1820 1803
  VSMC-4    49   211   214   241    58   301   393  328  179
  VSMC-5   103   282   227   375    58   170   162  132   91
  VSMC-6    54   175   337  2132   407   193   295  481  349
  VSMC-7    48   510   826  1210   227   285   199  224  282
  VSMC-8   416   914   736  1089   215   145    14   31    5
  Neuron     7     5    40   493   180   382    37    6    7
  SWN       80    20   101   349   162   346   126  133  436
  MLA        0    17     4    43     3   193    52   34   29
  Mural     11    15    19   124     3    73   326   42   30
In [49]:
tmp.df <- cncc.combined.sub@meta.data
tmp.df <- tmp.df[grep("ct2|vcl", tmp.df$cellName),]
In [50]:
head(tmp.df)
A data.frame: 6 × 39
orig.identnCount_RNAnFeature_RNAstagecellNamelabsplitpercent.mtintegrated_snn_res.0.7seurat_clusters⋯c15_nc16_nc17_nc18_nc19_nc20_nordered_raw_clustercluster_numberordered_raw_cluster_finalEMBO_anno
<chr><dbl><int><chr><chr><chr><chr><dbl><fct><fct>⋯<dbl><dbl><dbl><dbl><dbl><dbl><fct><fct><fct><fct>
vcl_CCTTCGACACCAACCGvcl 60161999E13.5vcl_CCTTCGACACCAACCGEllyElly_E13.53.4740695 5 ⋯ 0.017746340-0.3405605 0.02470102-0.002038496-0.13094563-0.20288788111211VSMC-7
vcl_CTCTAATTCATAGCACvcl 74892688E13.5vcl_CTCTAATTCATAGCACEllyElly_E13.53.3782885 5 ⋯ 0.064430912-0.3335513 0.07300808-0.375260891-0.13539847-0.07655114111211VSMC-7
vcl_CAGAGAGCAGCGAACAvcl227145129E13.5vcl_CAGAGAGCAGCGAACAEllyElly_E13.53.2490971111⋯ 0.011794468-0.3477211 0.15103338-0.033179337-0.22253936 0.20032665141414SWN
vcl_GCATGCGTCTGCCCTAvcl538397200E13.5vcl_GCATGCGTCTGCCCTAEllyElly_E13.52.9365331414⋯-0.028763796-0.2270507 1.34551991-0.050874703 0.03675817 0.22297381131313Neuron
vcl_AATCCAGGTCGAACAGvcl102902985E13.5vcl_AATCCAGGTCGAACAGEllyElly_E13.54.0427603 3 ⋯ 0.001014918 0.1127407-0.11073191 0.105957097 0.28313209 0.018249232 1 2 MES-2
vcl_CATCAAGTCTGTCAAGvcl 81422842E13.5vcl_CATCAAGTCTGTCAAGEllyElly_E13.57.5902737 7 ⋯ 0.045419791-0.1312454 0.02235031-0.229241711-0.03896710-0.18024788121012VSMC-8
In [60]:
write.table(table(tmp.df$EMBO_anno, tmp.df$orig.ident), file = "tmp.table.txt", sep = "\t")

percentage barplot¶

In [52]:
library(dplyr)
countTable <- tmp.df %>% dplyr::group_by(orig.ident) %>% dplyr::count(EMBO_anno) %>% dplyr::mutate(prop = n/sum(n))
In [53]:
subset(countTable, EMBO_anno=="VSMC-3")
A grouped_df: 2 × 4
orig.identEMBO_annonprop
<chr><fct><int><dbl>
ct2VSMC-3 640.03975155
vclVSMC-32260.04619787
In [54]:
head(countTable)
A grouped_df: 6 × 4
orig.identEMBO_annonprop
<chr><fct><int><dbl>
ct2MES-1 220.013664596
ct2MES-2 690.042857143
ct2MES-3 50.003105590
ct2MES-4 80.004968944
ct2VSMC-1 710.044099379
ct2VSMC-22470.153416149
In [55]:
options(repr.plot.width=3.5, repr.plot.height=6)
library(scales)
g <- ggplot(data=countTable, aes(x=orig.ident, y=prop, fill=EMBO_anno)) +
  geom_bar(stat="identity", position="fill", alpha=1) +
  theme_bw() + 
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  labs(x = "",y = "Percentage of Population (%)", title = " ") + 
  theme(axis.text.x  = element_text(face="plain", angle=30, size = 15, color = "black", vjust=0.5),
        axis.text.y  = element_text(face="plain", size = 15, color = "black"),
        axis.title =element_text(size = 15)) +
  scale_y_continuous(labels = percent_format(suffix="")) +
  # coord_flip() +
  # change legend position and size
  theme(legend.title=element_blank(), axis.ticks.x=element_blank()) +
  # guides(colour = guide_legend(override.aes = list(size=3))) +
  #
  #geom_text(aes(label = scales::percent(prop, accuracy = 0.1), 
  #                y = text.y, group = stage), position = position_dodge(width = 0.9), vjust = 0.5, size=5) +
  scale_fill_manual(values=myColors, guide = guide_legend(reverse=F)) +
  scale_x_discrete(labels=c("ct2" = "Control", "vcl" = "Vcl cKO"))
g
In [61]:
ggsave(filename = "Figures/integrated.percentage.pdf", width = 3.5, height = 6)

DEG 1by1¶

In [279]:
cluster.list <- list("DE1"=c("VSMC-2","VSMC-8"), "DE2"=c("VSMC-4"), "DE3"=c("MES-2"))
In [280]:
ident.1 = "Vcl cKO" 
ident.2 = "Control"
In [281]:
# prepare group
tmp.group <- rep("Control", length.out = length(cncc.combined.sub@active.ident))
names(tmp.group) <- names(cncc.combined.sub@active.ident)
tmp.group[grepl("vcl", names(tmp.group))] <- "Vcl cKO"
tmp.group <- factor(tmp.group, levels = c("Control", "Vcl cKO"))
cncc.combined.sub$group <- tmp.group
table(cncc.combined.sub$group)
Control Vcl cKO 
  43007    4892 
In [282]:
# prepare cluster
cncc.combined.sub$cluster <- as.character(cncc.combined.sub@active.ident)
table(cncc.combined.sub$cluster)
 MES-1  MES-2  MES-3  MES-4    MLA  Mural Neuron    SWN VSMC-1 VSMC-2 VSMC-3 
  3861   4080   4958   1419    375    643   1157   1753   2694   3797   7789 
VSMC-4 VSMC-5 VSMC-6 VSMC-7 VSMC-8 
  1974   1600   4423   3811   3565 
In [283]:
seuratObj <- subset(cncc.combined.sub, subset = split=="Elly_E13.5")
In [354]:
table(seuratObj$group, seuratObj$cluster)
         
          MES-1 MES-2 MES-3 MES-4  MLA Mural Neuron  SWN VSMC-1 VSMC-2 VSMC-3
  Control    22    69     5     8   11     2    139   61     71    247     64
  Vcl cKO    37   154    40    33    8    16    273  257    211    464    226
         
          VSMC-4 VSMC-5 VSMC-6 VSMC-7 VSMC-8
  Control     36     41    392    161    281
  Vcl cKO     34    198   1525    773    643
In [341]:
# Vcl.DEGs <- DEG.cluster.list(seuratObj, cluster.list)
In [355]:
DEG.1by1 <- function(seuratObj, ident.1 = "Vcl cKO", ident.2 = "Control", assay = "RNA") {
    DEGs <- list()
    for (i in unique(seuratObj@active.ident)) {
        message(sprintf("identifying DEGs in %s between %s and %s...", i, ident.1, ident.2))
        # add log2FC and correlation
        cells_1 <- rownames(subset(seuratObj@meta.data, cluster==i & group==ident.2))
        cells_2 <- rownames(subset(seuratObj@meta.data, cluster==i & group==ident.1))
        if (length(cells_1)<5 | length(cells_2)<5) next
        log2fc <- log2(rowMeans(seuratObj@assays$RNA@data[,cells_2])+1) - log2(rowMeans(seuratObj@assays$RNA@data[,cells_1])+1)
        corM <- cor(t(as.matrix(seuratObj@assays$RNA@data[,c(cells_1,cells_2)])), c(rep(0, length.out = length(cells_1)),
                                                                       rep(1, length.out = length(cells_2))
                                                                       ))
        corM[is.na(corM)] <- 0
        #
        tmp.seuratObj <- subset(seuratObj, subset = cluster == i)
        tmp.seuratObj@active.ident <- tmp.seuratObj$group
        tmp.DEGs <- FindMarkers(tmp.seuratObj, ident.1 = ident.1, ident.2 = ident.2, only.pos = F, 
                                min.pct = 0, min.diff.pct = "-Inf", logfc.threshold = 0, assay = assay)
        if(dim(tmp.DEGs)[1]<1) {
          message(sprintf("skipping %s, no DEGs found...", i))
          next
        }
        DEGs[[i]] <- add.missing.DEGs(tmp.DEGs, rownames(tmp.seuratObj@assays$RNA@counts))
        # fill emplty genes
        tmp.empty <- rowSums(DEGs[[i]])==0
        DEGs[[i]][tmp.empty,]$p_val <- 1
        DEGs[[i]][tmp.empty,]$p_val_adj <- 1
        # add gene column
        DEGs[[i]]$gene <- rownames(DEGs[[i]])
        # write to df
        DEGs[[i]]$log2FC <- log2fc[DEGs[[i]]$gene]
        DEGs[[i]]$correlation <- as.data.frame(corM)[DEGs[[i]]$gene,]
        #
    }
    return(DEGs)
}
In [356]:
Vcl.DEGs <- DEG.1by1(seuratObj, ident.1 = "Vcl cKO", ident.2 = "Control")
identifying DEGs in VSMC-7 between Vcl cKO and Control...

Warning message in cor(t(as.matrix(seuratObj@assays$RNA@data[, c(cells_1, cells_2)])), :
“the standard deviation is zero”
identifying DEGs in SWN between Vcl cKO and Control...

Warning message in cor(t(as.matrix(seuratObj@assays$RNA@data[, c(cells_1, cells_2)])), :
“the standard deviation is zero”
identifying DEGs in Neuron between Vcl cKO and Control...

Warning message in cor(t(as.matrix(seuratObj@assays$RNA@data[, c(cells_1, cells_2)])), :
“the standard deviation is zero”
identifying DEGs in MES-2 between Vcl cKO and Control...

Warning message in cor(t(as.matrix(seuratObj@assays$RNA@data[, c(cells_1, cells_2)])), :
“the standard deviation is zero”
identifying DEGs in VSMC-8 between Vcl cKO and Control...

Warning message in cor(t(as.matrix(seuratObj@assays$RNA@data[, c(cells_1, cells_2)])), :
“the standard deviation is zero”
identifying DEGs in VSMC-6 between Vcl cKO and Control...

Warning message in cor(t(as.matrix(seuratObj@assays$RNA@data[, c(cells_1, cells_2)])), :
“the standard deviation is zero”
identifying DEGs in MES-3 between Vcl cKO and Control...

Warning message in cor(t(as.matrix(seuratObj@assays$RNA@data[, c(cells_1, cells_2)])), :
“the standard deviation is zero”
identifying DEGs in VSMC-3 between Vcl cKO and Control...

Warning message in cor(t(as.matrix(seuratObj@assays$RNA@data[, c(cells_1, cells_2)])), :
“the standard deviation is zero”
identifying DEGs in MES-4 between Vcl cKO and Control...

Warning message in cor(t(as.matrix(seuratObj@assays$RNA@data[, c(cells_1, cells_2)])), :
“the standard deviation is zero”
identifying DEGs in VSMC-2 between Vcl cKO and Control...

Warning message in cor(t(as.matrix(seuratObj@assays$RNA@data[, c(cells_1, cells_2)])), :
“the standard deviation is zero”
identifying DEGs in VSMC-1 between Vcl cKO and Control...

Warning message in cor(t(as.matrix(seuratObj@assays$RNA@data[, c(cells_1, cells_2)])), :
“the standard deviation is zero”
identifying DEGs in VSMC-4 between Vcl cKO and Control...

Warning message in cor(t(as.matrix(seuratObj@assays$RNA@data[, c(cells_1, cells_2)])), :
“the standard deviation is zero”
identifying DEGs in VSMC-5 between Vcl cKO and Control...

Warning message in cor(t(as.matrix(seuratObj@assays$RNA@data[, c(cells_1, cells_2)])), :
“the standard deviation is zero”
identifying DEGs in Mural between Vcl cKO and Control...

identifying DEGs in MES-1 between Vcl cKO and Control...

Warning message in cor(t(as.matrix(seuratObj@assays$RNA@data[, c(cells_1, cells_2)])), :
“the standard deviation is zero”
identifying DEGs in MLA between Vcl cKO and Control...

Warning message in cor(t(as.matrix(seuratObj@assays$RNA@data[, c(cells_1, cells_2)])), :
“the standard deviation is zero”
In [357]:
Vcl.DEGs.sig <- list()
for (i in names(Vcl.DEGs)) {
    print(i)
    Vcl.DEGs.sig[[i]] <- subset(Vcl.DEGs[[i]], p_val<0.05 & abs(log2FC)>0.01 & abs(correlation)>0.15 & log2FC*correlation>0)
    print(dim(Vcl.DEGs.sig[[i]]))
}
[1] "VSMC-7"
[1] 275   8
[1] "SWN"
[1] 599   8
[1] "Neuron"
[1] 1133    8
[1] "MES-2"
[1] 1338    8
[1] "VSMC-8"
[1] 414   8
[1] "VSMC-6"
[1] 240   8
[1] "MES-3"
[1] 1201    8
[1] "VSMC-3"
[1] 844   8
[1] "MES-4"
[1] 1194    8
[1] "VSMC-2"
[1] 764   8
[1] "VSMC-1"
[1] 1023    8
[1] "VSMC-4"
[1] 1322    8
[1] "VSMC-5"
[1] 898   8
[1] "MES-1"
[1] 1113    8
[1] "MLA"
[1] 526   8
In [358]:
# save(Vcl.DEGs, Vcl.DEGs.sig, file = "keyRdata/Vcl.DEGs.Rdata")
In [359]:
save(Vcl.DEGs, Vcl.DEGs.sig, file = "keyRdata/Vcl.DEGs.all.clusters.Rdata")
In [306]:
# DEGs[[1]][order(DEGs[[1]]$p_val_adj, decreasing = F),]
In [308]:
# Vcl.DEGs[[1]][order(Vcl.DEGs[[1]]$p_val_adj, decreasing = F),]
In [1]:
print(load("keyRdata/Vcl.DEGs.all.clusters.Rdata"))
[1] "Vcl.DEGs"     "Vcl.DEGs.sig"
In [2]:
length(names(Vcl.DEGs))
15
In [3]:
names(Vcl.DEGs)
  1. 'VSMC-7'
  2. 'SWN'
  3. 'Neuron'
  4. 'MES-2'
  5. 'VSMC-8'
  6. 'VSMC-6'
  7. 'MES-3'
  8. 'VSMC-3'
  9. 'MES-4'
  10. 'VSMC-2'
  11. 'VSMC-1'
  12. 'VSMC-4'
  13. 'VSMC-5'
  14. 'MES-1'
  15. 'MLA'
In [4]:
head(Vcl.DEGs[[1]])
A data.frame: 6 × 8
p_valavg_logFCpct.1pct.2p_val_adjgenelog2FCcorrelation
<dbl><dbl><dbl><dbl><dbl><chr><dbl><dbl>
Xkr40.65015867 0.00070149570.0010.0001Xkr4 0.0008085966 0.01494109
Gm19921.00000000 0.00000000000.0000.0001Gm1992 0.0000000000 0.00000000
Rp11.00000000 0.00000000000.0000.0001Rp1 0.0000000000 0.00000000
Sox171.00000000 0.00000000000.0000.0001Sox17 0.0000000000 0.00000000
Gm373231.00000000 0.00000000000.0000.0001Gm37323 0.0000000000 0.00000000
Mrpl150.06888887-0.02845932450.4580.6341Mrpl15 -0.0724652353-0.05565887

save xlsx¶

In [5]:
library(xlsx)
In [15]:
DEG.sig <- lapply(Vcl.DEGs, function(x) {
    subset(x, p_val<0.05 & abs(log2FC)>0.1)[,c("gene","p_val","p_val_adj","avg_logFC")]
})
In [16]:
lapply(DEG.sig, dim)
$`VSMC-7`
  1. 1262
  2. 4
$SWN
  1. 1038
  2. 4
$Neuron
  1. 1352
  2. 4
$`MES-2`
  1. 1337
  2. 4
$`VSMC-8`
  1. 1042
  2. 4
$`VSMC-6`
  1. 824
  2. 4
$`MES-3`
  1. 1109
  2. 4
$`VSMC-3`
  1. 1146
  2. 4
$`MES-4`
  1. 811
  2. 4
$`VSMC-2`
  1. 1126
  2. 4
$`VSMC-1`
  1. 1289
  2. 4
$`VSMC-4`
  1. 1102
  2. 4
$`VSMC-5`
  1. 1257
  2. 4
$`MES-1`
  1. 978
  2. 4
$MLA
  1. 509
  2. 4
In [17]:
out.file <- "cluster.specific.DEGs.xlsx"
tmp.list <- DEG.sig
sample.list <- unique(names(tmp.list))
for (i in sample.list) {
    print(i)
    tmp.df <- tmp.list[[i]]
    # tmp.df <- subset(tmp.df, pvalue<0.05)
    print(dim(tmp.df))
    if (i==sample.list[1]) {
        write.xlsx(tmp.df, file=out.file, sheetName=i, row.names=F)
    } else {
        write.xlsx(tmp.df, file=out.file, sheetName=i, append=TRUE, row.names=F)
    }
}
[1] "VSMC-7"
[1] 1262    4
[1] "SWN"
[1] 1038    4
[1] "Neuron"
[1] 1352    4
[1] "MES-2"
[1] 1337    4
[1] "VSMC-8"
[1] 1042    4
[1] "VSMC-6"
[1] 824   4
[1] "MES-3"
[1] 1109    4
[1] "VSMC-3"
[1] 1146    4
[1] "MES-4"
[1] 811   4
[1] "VSMC-2"
[1] 1126    4
[1] "VSMC-1"
[1] 1289    4
[1] "VSMC-4"
[1] 1102    4
[1] "VSMC-5"
[1] 1257    4
[1] "MES-1"
[1] 978   4
[1] "MLA"
[1] 509   4

check DEGs¶

In [200]:
print(load("keyRdata/Vcl.DEGs.Rdata"))
[1] "Vcl.DEGs"
In [335]:
tmp.genes <- "Rgs2"
Vcl.DEGs[[1]][tmp.genes,]
Vcl.DEGs[[2]][tmp.genes,]
Vcl.DEGs[[3]][tmp.genes,]
A data.frame: 1 × 8
p_valavg_logFCpct.1pct.2p_val_adjgenelog2FCcorrelation
<dbl><dbl><dbl><dbl><dbl><chr><dbl><dbl>
Rgs20.007397128-0.021199970.3690.471Rgs2-0.04750919-0.03766536
A data.frame: 1 × 8
p_valavg_logFCpct.1pct.2p_val_adjgenelog2FCcorrelation
<dbl><dbl><dbl><dbl><dbl><chr><dbl><dbl>
Rgs20.02279682-0.34241980.1760.4441Rgs2-0.2674717-0.2715347
A data.frame: 1 × 8
p_valavg_logFCpct.1pct.2p_val_adjgenelog2FCcorrelation
<dbl><dbl><dbl><dbl><dbl><chr><dbl><dbl>
Rgs20.02447955-0.27660560.2010.3481Rgs2-0.1746621-0.1504044
In [336]:
# tmp.genes <- "Lum"
tmp.expr <- cbind(score=seuratObj@assays$RNA@data[tmp.genes,], seuratObj@meta.data[,c("group","cluster")])
In [337]:
table(tmp.expr$group)
Control Vcl cKO 
   1610    4892 
In [338]:
# tmp.expr$group <- plyr::mapvalues(tmp.expr$group, from = c("vcl", "ct2"), to = c("Vcl cKO","Control"))
tmp.expr$group <- factor(tmp.expr$group, levels = c("Control","Vcl cKO"))
In [339]:
table(tmp.expr$group, tmp.expr$cluster)
         
          MES-1 MES-2 MES-3 MES-4  MLA Mural Neuron  SWN VSMC-1 VSMC-2 VSMC-3
  Control    22    69     5     8   11     2    139   61     71    247     64
  Vcl cKO    37   154    40    33    8    16    273  257    211    464    226
         
          VSMC-4 VSMC-5 VSMC-6 VSMC-7 VSMC-8
  Control     36     41    392    161    281
  Vcl cKO     34    198   1525    773    643
In [340]:
options(repr.plot.width=5, repr.plot.height=4.2)
library(ggpubr)
p <- ggplot(data=tmp.expr, aes(x=cluster, y=score, fill=group)) +
        geom_boxplot() +
        theme_bw() +
        labs(title = tmp.genes,
             x = "Cluster", y = "Relative expression", fill = "") +
        # geom_jitter(shape=16, position=position_jitter(0.2)) +
        theme(axis.text.x  = element_text(face="plain", angle=90, size = 16, color = "black", vjust=0.6),
                axis.text.y  = element_text(size = 10),
                axis.title = element_text(size = 16)) +
        # stat_compare_means(comparisons = list(c("c1","c2")), label.y = 0.2, label = "p.signif") +
        # scale_y_continuous(limits = c(-0.3, 0.25)) +
        scale_fill_manual(values=brewer.pal(9,"Paired")[c(2,8)]) 
p
In [221]:
ggsave(filename = paste("Figures/", tmp.genes, ".pdf", sep = ""), width = 5, height = 4.2, dpi = 800)

GO/KEGG¶

In [227]:
source("https://raw.githubusercontent.com/leezx/Toolsets/master/R/Toolsets.R")
source("https://raw.githubusercontent.com/leezx/Toolsets/master/R/Plot.R")
In [231]:
# if (!require("BiocManager", quietly = TRUE))
#     install.packages("BiocManager")

# BiocManager::install("org.Mm.eg.db")
In [236]:
# subset(Vcl.DEGs[[2]], p_val<0.05 & p_val_adj<0.05)
In [360]:
tmp.list <- list()
for (i in names(Vcl.DEGs.sig)){
    tmp.list[[i]] <- rownames(Vcl.DEGs.sig[[i]])
}
In [364]:
DEGs.ora.out <- ora.go.kegg.clusterProfiler(geneList = tmp.list, organism="mm")
In [363]:
for (i in names(DEGs.ora.out$go_list)){
    write.csv(subset(DEGs.ora.out$go_list[[i]]@result, p.adjust<0.05 & qvalue<0.05 & Count>5), 
              file = paste(i,".csv",sep=""))
}
In [328]:
# write.csv(subset(DEGs.ora.out$go_list$c1@result, p.adjust<0.05 & qvalue<0.05 & Count>5), file = "c1.csv")
# write.csv(subset(DEGs.ora.out$go_list$c2@result, p.adjust<0.05 & qvalue<0.05 & Count>5), file = "c2.csv")
# write.csv(subset(DEGs.ora.out$go_list$c3@result, p.adjust<0.05 & qvalue<0.05 & Count>5), file = "c3.csv")
In [ ]:

check old Vcl¶

In [366]:
print(load("keyRdata/E13.5_CNCC_merged_updated.Rdata"))
[1] "seuset"   "markers"  "all_tsne"
In [462]:
seuset@active.ident <- factor(seuset@active.ident, levels = c("c1","c2","c3","c4","c5","c6"))
In [463]:
options(repr.plot.width=6, repr.plot.height=5)
p1 <- DimPlot(seuset, reduction = "tsne", label = TRUE, repel = TRUE, label.size = 6, cols = brewer.pal(8,"Set2"))
p1
In [418]:
tmp.cluster <- as.character(cncc.combined.sub@active.ident[names(seuset@active.ident)])
names(tmp.cluster) <- names(seuset@active.ident)
In [419]:
tmp.cluster[is.na(tmp.cluster)] <- " "
In [420]:
tmp.cluster[tmp.cluster=="Mural"] <- " "
In [421]:
tmp.cluster[tmp.cluster=="MLA"] <- " "
In [435]:
rev(sort(unique(tmp.cluster)))
  1. VSMC-8
  2. VSMC-7
  3. VSMC-6
  4. VSMC-5
  5. VSMC-4
  6. VSMC-3
  7. VSMC-2
  8. VSMC-1
  9. SWN
  10. Neuron
  11. MES-4
  12. MES-3
  13. MES-2
  14. MES-1
Levels:
  1. ' '
  2. 'MES-1'
  3. 'MES-2'
  4. 'MES-3'
  5. 'MES-4'
  6. 'Neuron'
  7. 'SWN'
  8. 'VSMC-1'
  9. 'VSMC-2'
  10. 'VSMC-3'
  11. 'VSMC-4'
  12. 'VSMC-5'
  13. 'VSMC-6'
  14. 'VSMC-7'
  15. 'VSMC-8'
In [436]:
tmp.cluster <- factor(tmp.cluster, levels =  c('MES-1','MES-2','MES-3','MES-4','VSMC-1','VSMC-2','VSMC-3',
                       'VSMC-4','VSMC-5','VSMC-6','VSMC-7','VSMC-8','Neuron','SWN',' ')) # 'MLA','Mural'
In [437]:
seuset$integrated_cluster <- tmp.cluster
In [438]:
# seuset$integrated_cluster_full <- cncc.combined$EMBO[names(seuset@active.ident)]
In [439]:
table(seuset$integrated_cluster, seuset@active.ident)
        
           c6   c5   c4   c1   c2   c3
  MES-1    13    1    1   40    2    0
  MES-2     5  133   58   20    6    1
  MES-3    38    0    5    0    0    1
  MES-4     4    6    0   30    1    0
  VSMC-1    7   47   18  194    3    5
  VSMC-2    0   12  130  314   33  179
  VSMC-3  122  133   11   17    1    3
  VSMC-4   33   20    5    8    0    0
  VSMC-5   20   64   42  109    2    2
  VSMC-6    1 1013  848   25    1   16
  VSMC-7   34  148  740    1    0    3
  VSMC-8    0   23  799   61    5   31
  Neuron    1    2    7    0  134  222
  SWN       0    1    4    0  252   26
            7   31   30    8   15    5
In [464]:
options(repr.plot.width=6, repr.plot.height=5)
p2 <- DimPlot(seuset, reduction = "tsne", cols = myColors[1:15],
              label = T, repel = TRUE, label.size = 4, pt.size = 0.5, group.by = "integrated_cluster")
p2
In [465]:
options(repr.plot.width=12, repr.plot.height=5)
cowplot::plot_grid(p1,p2,ncol = 2)
In [466]:
ggsave(filename = "Figures/comparison_before_after_integration.tSNE.pdf", width = 12, height = 5, dpi = 800)
In [ ]:

In [ ]:

new UMAP¶

In [447]:
cncc.combined.sub.elly <- subset(cncc.combined.sub, subset = cellName %in% colnames(seuset))
In [448]:
cncc.combined.sub.elly$old_cluster <- seuset@active.ident[colnames(cncc.combined.sub.elly)]
In [454]:
cncc.combined.sub.elly$old_cluster <- factor(cncc.combined.sub.elly$old_cluster , levels = c("c1","c2","c3","c4","c5","c6"))
In [458]:
options(repr.plot.width=6, repr.plot.height=5)
p2 <- DimPlot(cncc.combined.sub.elly, reduction = "umap", cols = brewer.pal(8,"Set2"),
              label = T, repel = TRUE, label.size = 4, pt.size = 0.5, group.by = "old_cluster")
p2
In [459]:
options(repr.plot.width=12, repr.plot.height=5)
cowplot::plot_grid(p1,p2,ncol = 2)
In [460]:
ggsave(filename = "Figures/comparison_before_after_integration.UMAP.pdf", width = 12, height = 5, dpi = 800)
In [ ]:

In [ ]:

facet UMAP¶

In [132]:
# seuset <- RunPCA(seuset, features = unique(c(VariableFeatures(object = cncc.combined), cncc.markers$gene)))
In [133]:
options(repr.plot.width=8, repr.plot.height=7)
p <- DimPlot(cncc.combined, reduction = "umap", label = TRUE, repel = TRUE, group.by = "ident", split.by = "predict") +
        geom_vline(xintercept = -7) +
        geom_hline(yintercept = 5.4)
# p
all.umap <- p$data
In [235]:
dim(seuset)
  1. 16342
  2. 6393
In [135]:
all.umap.elly <- subset(all.umap, grepl("vcl|ct", rownames(all.umap)))
In [219]:
table(subset(all.umap, grepl("E12.5", rownames(all.umap)))$ident)
   CNCC     MES    VSMC  Neuron     SWN     MLA   Mural Unknown 
   2065    1182    2517      32     109       4      18     289 
In [220]:
table(subset(all.umap, grepl("E13.5", rownames(all.umap)))$ident)
   CNCC     MES    VSMC  Neuron     SWN     MLA   Mural Unknown 
   1581    2755    1277      67      37      24      98     201 
In [221]:
table(subset(all.umap, grepl("E14.5", rownames(all.umap)))$ident)
   CNCC     MES    VSMC  Neuron     SWN     MLA   Mural Unknown 
   1083    2009    2047     328     382     182      76     598 
In [137]:
all.umap.elly$group <- "Control"
all.umap.elly[grepl("vcl", rownames(all.umap.elly)),]$group <- "Vcl cKO"
In [141]:
head(all.umap.elly)
table(all.umap.elly$group)
A data.frame: 6 × 5
UMAP_1UMAP_2identpredictgroup
<dbl><dbl><fct><chr><chr>
vcl_CCTTCGACACCAACCG -2.612578-5.0383182VSMCc5Vcl cKO
vcl_CTCTAATTCATAGCAC -2.160392-5.5188604VSMCc4Vcl cKO
vcl_CAGAGAGCAGCGAACA-12.680368-3.0148230SWN c2Vcl cKO
vcl_AATCCAGGTCGAACAG 3.421771 0.8463258MES c5Vcl cKO
vcl_CATCAAGTCTGTCAAG -4.815358-2.0471276VSMCc4Vcl cKO
vcl_CAGAGAGCAGCTGGCT 4.433164 0.1995236MES c5Vcl cKO
Control Vcl cKO 
   1523    4870 
In [144]:
set.seed(49)
all.umap.elly.b <- rbind(subset(all.umap.elly, group=="Control"),
                         subset(all.umap.elly, group=="Vcl cKO")[sample(1:4870, 1523),]
                        )
In [147]:
options(repr.plot.width=9, repr.plot.height=5)
g1 <- ggplot(all.umap.elly.b, aes(x=UMAP_1, y=UMAP_2, color=ident)) +
    # facet_wrap( ~group , ncol=3) +
    facet_grid(cols = vars(group)) +
    geom_point(size=0.8, alpha=1) +
    geom_density_2d(color='black', size=0.1, alpha=0.2) +
    # geom_text(data = facet_centers, mapping = aes(label = merged_cluster), size = 4, color="black") +
    labs(x = "UMAP-1",y = "UMAP-2", title = "") +
    theme_bw() +
    theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
    # change legend
    theme(legend.title=element_blank()) + # legend.position = "none", 
    guides(colour = guide_legend(override.aes = list(size=5))) +
    theme(strip.background = element_rect(fill = "gray97", color = NA)) + # strip background color
    # change strip, 17 and bold
    theme(strip.placement = "outside", strip.text.x = element_text(face="bold", size = 17), #italic
          strip.text.y = element_text(face="plain", size = 11)) +
    theme(panel.spacing=unit(.3, "lines"),
          panel.border = element_rect(color = "black", fill = NA, size = 0.2,colour = "black")) + 
    # change xy axis and title text size
    # axis test: 10
    # axis title: 14, with vjust or hjust 
    theme(axis.text  = element_text(face="plain", angle=0, size = 10, color = "black"),
            axis.title.y =element_text(size = 14), axis.title.x =element_text(size = 14, vjust=-1)) +
    scale_color_manual(values=brewer.pal(8,"Set1"))
g1
In [148]:
table(all.umap.elly.b$group, all.umap.elly.b$ident)
         
          CNCC  MES VSMC Neuron  SWN  MLA Mural Unknown
  Control  309  104  927    103   62    8     1       9
  Vcl cKO  237   92 1035     71   76    2     4       6
In [149]:
all.umap.elly.b$full_cluster <- cncc.combined$EMBO[rownames(all.umap.elly.b)]
In [151]:
options(repr.plot.width=9, repr.plot.height=5)
g1 <- ggplot(all.umap.elly.b, aes(x=UMAP_1, y=UMAP_2, color=full_cluster)) +
    # facet_wrap( ~group , ncol=3) +
    facet_grid(cols = vars(group)) +
    geom_point(size=0.8, alpha=1) +
    geom_density_2d(color='black', size=0.1, alpha=0.2) +
    # geom_text(data = facet_centers, mapping = aes(label = merged_cluster), size = 4, color="black") +
    labs(x = "UMAP-1",y = "UMAP-2", title = "") +
    theme_bw() +
    theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
    # change legend
    theme(legend.title=element_blank()) + # legend.position = "none", 
    guides(colour = guide_legend(override.aes = list(size=5))) +
    theme(strip.background = element_rect(fill = "gray97", color = NA)) + # strip background color
    # change strip, 17 and bold
    theme(strip.placement = "outside", strip.text.x = element_text(face="bold", size = 17), #italic
          strip.text.y = element_text(face="plain", size = 11)) +
    theme(panel.spacing=unit(.3, "lines"),
          panel.border = element_rect(color = "black", fill = NA, size = 0.2,colour = "black")) + 
    # change xy axis and title text size
    # axis test: 10
    # axis title: 14, with vjust or hjust 
    theme(axis.text  = element_text(face="plain", angle=0, size = 10, color = "black"),
            axis.title.y =element_text(size = 14), axis.title.x =element_text(size = 14, vjust=-1)) +
    scale_color_manual(values=c(brewer.pal(10,"Set1"), brewer.pal(10,"Set1")))
g1
Warning message in brewer.pal(10, "Set1"):
“n too large, allowed maximum for palette Set1 is 9
Returning the palette you asked for with that many colors
”
Warning message in brewer.pal(10, "Set1"):
“n too large, allowed maximum for palette Set1 is 9
Returning the palette you asked for with that many colors
”
In [152]:
table(all.umap.elly.b$group, all.umap.elly.b$full_cluster)
         
          VSMC-3 MES-3 VSMC-4 MES-2 CNCC-1 CNCC-2 VSMC-1 CNCC-3 VSMC-5 Unknown
  Control    484     6     58    96     31    176    255    102      5       9
  Vcl cKO    633     8     34    78     18    150    209     69     33       6
         
          VSMC-2 SWN VSMC-7 MES-1 VSMC-6 Neuron Mural MLA
  Control     70  62     20     2     35    103     1   8
  Vcl cKO     95  76     10     6     21     71     4   2

monocle¶

In [154]:
library(monocle)
In [155]:
head(all.umap.elly.b)
A data.frame: 6 × 6
UMAP_1UMAP_2identpredictgroupfull_cluster
<dbl><dbl><fct><chr><chr><fct>
ct2_AAACCTGCAGTTAACC 2.7239356-6.111257VSMC c6ControlVSMC-7
ct2_AAACCTGGTTGACGTT-4.7029686-2.880406VSMC c5ControlVSMC-1
ct2_AAACGGGGTTCCACTC-0.8335619-3.484131VSMC c5ControlVSMC-3
ct2_AAACGGGTCAACCATG-2.8774552 1.587930VSMC c4ControlVSMC-2
ct2_AAACGGGTCCCACTTG-5.044719212.960110Neuronc2ControlNeuron
ct2_AAAGATGCACCAACCG-2.8675112-4.743443VSMC c4ControlVSMC-3
In [156]:
all_tsne <- subset(all.umap.elly.b, ident %in% c("CNCC","VSMC","MES"))
In [157]:
pd <- new("AnnotatedDataFrame", data = all_tsne)
fd <- new("AnnotatedDataFrame", data = data.frame(gene_short_name=rownames(seuset@assays$RNA@counts), 
                                                  row.names = rownames(seuset@assays$RNA@counts)))
In [158]:
cds <- newCellDataSet(as.matrix(seuset@assays$RNA@counts[,rownames(all_tsne)]), 
                      phenoData = pd, 
                      featureData = fd, 
                      expressionFamily=negbinomial.size())

cds <- estimateSizeFactors(cds)
cds <- estimateDispersions(cds)

cds <- detectGenes(cds, min_expr = 0.1)
print(head(fData(cds)))
Removing 168 outliers

        gene_short_name num_cells_expressed
Xkr4               Xkr4                   8
Mrpl15           Mrpl15                1705
Lypla1           Lypla1                1114
Tcea1             Tcea1                1733
Rgs20             Rgs20                  32
Atp6v1h         Atp6v1h                 803
In [159]:
cds <- detectGenes(cds, min_expr = 0.1)
print(head(fData(cds)))
expressed_genes <- row.names(subset(fData(cds), num_cells_expressed >= 10))
table(cds$ident)
        gene_short_name num_cells_expressed
Xkr4               Xkr4                   8
Mrpl15           Mrpl15                1705
Lypla1           Lypla1                1114
Tcea1             Tcea1                1733
Rgs20             Rgs20                  32
Atp6v1h         Atp6v1h                 803
   CNCC     MES    VSMC  Neuron     SWN     MLA   Mural Unknown 
    546     196    1962       0       0       0       0       0 
In [160]:
# time-consuming step
diff_test_res <- differentialGeneTest(cds[expressed_genes,], fullModelFormulaStr = "~ident")
# head(diff_test_res)
In [162]:
ordering_genes <- row.names (subset(diff_test_res, qval < 0.01))
ordering_genes <- row.names (subset(diff_test_res, qval < 0.00001 & num_cells_expressed > 50 & num_cells_expressed < 1000))
length(ordering_genes)
1112
In [163]:
head(ordering_genes)
length(ordering_genes)
# # set my genes
# ordering_genes <- unique(markers$gene)
# length(ordering_genes)
  1. 'Mybl1'
  2. 'Tcf24'
  3. 'Eya1'
  4. 'Terf1'
  5. 'Rdh10'
  6. 'Gdap1'
1112
In [ ]:
cds <- setOrderingFilter(cds, ordering_genes)
In [185]:
tmp <- subset(cncc.markers, cluster %in% c("CNCC","MES","VSMC"))$gene
In [186]:
# VariableFeatures(object = cncc.combined), 
# cncc.markers$gene
cds <- setOrderingFilter(cds, unique(tmp))
In [187]:
cds <- reduceDimension(cds, max_components = 2, method = 'DDRTree')
cds <- orderCells(cds)
In [188]:
table(cds$ident)
   CNCC     MES    VSMC  Neuron     SWN     MLA   Mural Unknown 
    546     196    1962       0       0       0       0       0 
In [189]:
options(repr.plot.width=5, repr.plot.height=5)
plot_cell_trajectory(cds, color_by = "ident", show_backbone = F, show_tree = T, 
                     state_number_size = 0, show_branch_points = F) +
    scale_color_manual(values = brewer.pal(8,"Set2")[c(1,4,6)])
In [240]:
save(cds, file = "monocle.trajectory.CNCC.MES.VSMC.Rdata")

check YFP for CNCC¶

In [171]:
tmp.df <- Read10X("/home/lizhixin/project/scRNA-seq/rawData/10x/Vcl-YFP-CNCC_report_YFP/outs/filtered_feature_bc_matrix")
In [1]:
# tmp.df <- Read10X("/home/lizhixin/project/scRNA-seq/rawData/10x/Vcl-YFP-CNCC_report/outs/filtered_gene_bc_matrices/mm10/")
In [179]:
dim(tmp.df)
  1. 27998
  2. 8563
In [172]:
dim(tmp.df)
  1. 31054
  2. 12959
In [173]:
options(repr.plot.width=4, repr.plot.height=4)
hist(tmp.df["YFP",], breaks = 10, xlab = "YFP expression", main = "Detection of YFP in \nVcl-YFP-CNCC")
In [180]:
3276 / (3276 + 9683)
0.252797283741029
In [174]:
table(tmp.df["YFP",]>0)
FALSE  TRUE 
 3276  9683 
In [169]:
table(tmp.df["YFP",]>0)
FALSE  TRUE 
 4082  7478 
In [ ]: